{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9998726033505319, "eval_steps": 500, "global_step": 15698, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 1.736969857569419, "learning_rate": 2.1231422505307857e-08, "loss": 0.6756, "step": 1 }, { "epoch": 0.0, "grad_norm": 2.037470655132517, "learning_rate": 4.246284501061571e-08, "loss": 0.8562, "step": 2 }, { "epoch": 0.0, "grad_norm": 1.9218059224199386, "learning_rate": 6.369426751592358e-08, "loss": 0.8068, "step": 3 }, { "epoch": 0.0, "grad_norm": 1.619998245287453, "learning_rate": 8.492569002123143e-08, "loss": 0.6641, "step": 4 }, { "epoch": 0.0, "grad_norm": 1.7552286831643211, "learning_rate": 1.0615711252653928e-07, "loss": 0.6532, "step": 5 }, { "epoch": 0.0, "grad_norm": 1.725938522012392, "learning_rate": 1.2738853503184715e-07, "loss": 0.6431, "step": 6 }, { "epoch": 0.0, "grad_norm": 1.8840277525393838, "learning_rate": 1.4861995753715502e-07, "loss": 0.7829, "step": 7 }, { "epoch": 0.0, "grad_norm": 1.787761961910525, "learning_rate": 1.6985138004246285e-07, "loss": 0.6583, "step": 8 }, { "epoch": 0.0, "grad_norm": 1.856382702441212, "learning_rate": 1.9108280254777072e-07, "loss": 0.8299, "step": 9 }, { "epoch": 0.0, "grad_norm": 1.9440185036397162, "learning_rate": 2.1231422505307855e-07, "loss": 0.7814, "step": 10 }, { "epoch": 0.0, "grad_norm": 2.042243550306094, "learning_rate": 2.3354564755838642e-07, "loss": 0.8368, "step": 11 }, { "epoch": 0.0, "grad_norm": 1.7996395106043221, "learning_rate": 2.547770700636943e-07, "loss": 0.638, "step": 12 }, { "epoch": 0.0, "grad_norm": 1.869579841578725, "learning_rate": 2.7600849256900214e-07, "loss": 0.8517, "step": 13 }, { "epoch": 0.0, "grad_norm": 1.9095070756184216, "learning_rate": 2.9723991507431003e-07, "loss": 0.8669, "step": 14 }, { "epoch": 0.0, "grad_norm": 1.7011520507056614, "learning_rate": 3.1847133757961787e-07, "loss": 0.6481, "step": 15 }, { "epoch": 0.0, "grad_norm": 1.72656352612305, "learning_rate": 3.397027600849257e-07, "loss": 0.6676, "step": 16 }, { "epoch": 0.0, "grad_norm": 1.9850011594794108, "learning_rate": 3.6093418259023354e-07, "loss": 0.8064, "step": 17 }, { "epoch": 0.0, "grad_norm": 1.9961052081277577, "learning_rate": 3.8216560509554143e-07, "loss": 0.8623, "step": 18 }, { "epoch": 0.0, "grad_norm": 1.5419972208334418, "learning_rate": 4.0339702760084927e-07, "loss": 0.6767, "step": 19 }, { "epoch": 0.0, "grad_norm": 2.1838794830736905, "learning_rate": 4.246284501061571e-07, "loss": 0.8244, "step": 20 }, { "epoch": 0.0, "grad_norm": 1.9317232935018747, "learning_rate": 4.45859872611465e-07, "loss": 0.852, "step": 21 }, { "epoch": 0.0, "grad_norm": 1.7085361334482871, "learning_rate": 4.6709129511677283e-07, "loss": 0.657, "step": 22 }, { "epoch": 0.0, "grad_norm": 1.8870619473509618, "learning_rate": 4.883227176220808e-07, "loss": 0.8576, "step": 23 }, { "epoch": 0.0, "grad_norm": 1.5299893741179431, "learning_rate": 5.095541401273886e-07, "loss": 0.6873, "step": 24 }, { "epoch": 0.0, "grad_norm": 1.4250924984224744, "learning_rate": 5.307855626326964e-07, "loss": 0.7369, "step": 25 }, { "epoch": 0.0, "grad_norm": 1.7077717936168544, "learning_rate": 5.520169851380043e-07, "loss": 0.815, "step": 26 }, { "epoch": 0.0, "grad_norm": 1.5997246770566833, "learning_rate": 5.732484076433121e-07, "loss": 0.7909, "step": 27 }, { "epoch": 0.0, "grad_norm": 1.4952396677004043, "learning_rate": 5.944798301486201e-07, "loss": 0.6704, "step": 28 }, { "epoch": 0.0, "grad_norm": 1.3899327550048044, "learning_rate": 6.157112526539279e-07, "loss": 0.6509, "step": 29 }, { "epoch": 0.0, "grad_norm": 2.061712463091723, "learning_rate": 6.369426751592357e-07, "loss": 0.85, "step": 30 }, { "epoch": 0.0, "grad_norm": 1.2744370728062344, "learning_rate": 6.581740976645436e-07, "loss": 0.6423, "step": 31 }, { "epoch": 0.0, "grad_norm": 1.3923514258885368, "learning_rate": 6.794055201698514e-07, "loss": 0.6969, "step": 32 }, { "epoch": 0.0, "grad_norm": 1.3692117861966746, "learning_rate": 7.006369426751592e-07, "loss": 0.6553, "step": 33 }, { "epoch": 0.0, "grad_norm": 1.096917613406094, "learning_rate": 7.218683651804671e-07, "loss": 0.5882, "step": 34 }, { "epoch": 0.0, "grad_norm": 1.23589373051139, "learning_rate": 7.43099787685775e-07, "loss": 0.6099, "step": 35 }, { "epoch": 0.0, "grad_norm": 1.457426821981949, "learning_rate": 7.643312101910829e-07, "loss": 0.8656, "step": 36 }, { "epoch": 0.0, "grad_norm": 1.3463087038344559, "learning_rate": 7.855626326963907e-07, "loss": 0.7443, "step": 37 }, { "epoch": 0.0, "grad_norm": 1.2354215190204334, "learning_rate": 8.067940552016985e-07, "loss": 0.6589, "step": 38 }, { "epoch": 0.0, "grad_norm": 1.3543046797702938, "learning_rate": 8.280254777070064e-07, "loss": 0.7884, "step": 39 }, { "epoch": 0.01, "grad_norm": 0.9978970211744808, "learning_rate": 8.492569002123142e-07, "loss": 0.646, "step": 40 }, { "epoch": 0.01, "grad_norm": 1.198186746771855, "learning_rate": 8.70488322717622e-07, "loss": 0.8023, "step": 41 }, { "epoch": 0.01, "grad_norm": 0.9222935238354153, "learning_rate": 8.9171974522293e-07, "loss": 0.7114, "step": 42 }, { "epoch": 0.01, "grad_norm": 0.7987781325353095, "learning_rate": 9.129511677282378e-07, "loss": 0.5467, "step": 43 }, { "epoch": 0.01, "grad_norm": 0.9526224105233752, "learning_rate": 9.341825902335457e-07, "loss": 0.5956, "step": 44 }, { "epoch": 0.01, "grad_norm": 1.4331820306922685, "learning_rate": 9.554140127388537e-07, "loss": 0.8467, "step": 45 }, { "epoch": 0.01, "grad_norm": 0.8270249691131161, "learning_rate": 9.766454352441615e-07, "loss": 0.6266, "step": 46 }, { "epoch": 0.01, "grad_norm": 0.8966224676323107, "learning_rate": 9.978768577494694e-07, "loss": 0.6487, "step": 47 }, { "epoch": 0.01, "grad_norm": 1.1974556124489195, "learning_rate": 1.0191082802547772e-06, "loss": 0.8419, "step": 48 }, { "epoch": 0.01, "grad_norm": 1.2300556285046356, "learning_rate": 1.040339702760085e-06, "loss": 0.7656, "step": 49 }, { "epoch": 0.01, "grad_norm": 1.088997811624343, "learning_rate": 1.0615711252653929e-06, "loss": 0.6085, "step": 50 }, { "epoch": 0.01, "grad_norm": 1.1661197607504696, "learning_rate": 1.0828025477707007e-06, "loss": 0.7957, "step": 51 }, { "epoch": 0.01, "grad_norm": 0.8565785576485809, "learning_rate": 1.1040339702760086e-06, "loss": 0.5701, "step": 52 }, { "epoch": 0.01, "grad_norm": 0.8073850980562628, "learning_rate": 1.1252653927813164e-06, "loss": 0.6135, "step": 53 }, { "epoch": 0.01, "grad_norm": 1.1524349803707088, "learning_rate": 1.1464968152866242e-06, "loss": 0.7108, "step": 54 }, { "epoch": 0.01, "grad_norm": 0.8949503229969369, "learning_rate": 1.167728237791932e-06, "loss": 0.644, "step": 55 }, { "epoch": 0.01, "grad_norm": 1.1290110901645551, "learning_rate": 1.1889596602972401e-06, "loss": 0.7598, "step": 56 }, { "epoch": 0.01, "grad_norm": 1.2260917734247248, "learning_rate": 1.210191082802548e-06, "loss": 0.7976, "step": 57 }, { "epoch": 0.01, "grad_norm": 0.7484536915279122, "learning_rate": 1.2314225053078558e-06, "loss": 0.6533, "step": 58 }, { "epoch": 0.01, "grad_norm": 0.7018104782389608, "learning_rate": 1.2526539278131636e-06, "loss": 0.6257, "step": 59 }, { "epoch": 0.01, "grad_norm": 0.7496189310231463, "learning_rate": 1.2738853503184715e-06, "loss": 0.6163, "step": 60 }, { "epoch": 0.01, "grad_norm": 1.0275015023681622, "learning_rate": 1.2951167728237793e-06, "loss": 0.7835, "step": 61 }, { "epoch": 0.01, "grad_norm": 0.8049740996421182, "learning_rate": 1.3163481953290871e-06, "loss": 0.5951, "step": 62 }, { "epoch": 0.01, "grad_norm": 0.769250999670833, "learning_rate": 1.337579617834395e-06, "loss": 0.6993, "step": 63 }, { "epoch": 0.01, "grad_norm": 0.7367855450371175, "learning_rate": 1.3588110403397028e-06, "loss": 0.5676, "step": 64 }, { "epoch": 0.01, "grad_norm": 1.0538913639692464, "learning_rate": 1.3800424628450107e-06, "loss": 0.7137, "step": 65 }, { "epoch": 0.01, "grad_norm": 0.7836353836094159, "learning_rate": 1.4012738853503185e-06, "loss": 0.6458, "step": 66 }, { "epoch": 0.01, "grad_norm": 1.2681341915414426, "learning_rate": 1.4225053078556263e-06, "loss": 0.7745, "step": 67 }, { "epoch": 0.01, "grad_norm": 0.7464717867766247, "learning_rate": 1.4437367303609342e-06, "loss": 0.6341, "step": 68 }, { "epoch": 0.01, "grad_norm": 0.7034007234369021, "learning_rate": 1.4649681528662422e-06, "loss": 0.5941, "step": 69 }, { "epoch": 0.01, "grad_norm": 0.9151609867840141, "learning_rate": 1.48619957537155e-06, "loss": 0.7036, "step": 70 }, { "epoch": 0.01, "grad_norm": 0.8111596902625513, "learning_rate": 1.5074309978768579e-06, "loss": 0.6062, "step": 71 }, { "epoch": 0.01, "grad_norm": 0.7583995233620089, "learning_rate": 1.5286624203821657e-06, "loss": 0.5653, "step": 72 }, { "epoch": 0.01, "grad_norm": 0.9962891903702115, "learning_rate": 1.5498938428874736e-06, "loss": 0.7762, "step": 73 }, { "epoch": 0.01, "grad_norm": 1.1313304547472938, "learning_rate": 1.5711252653927814e-06, "loss": 0.7421, "step": 74 }, { "epoch": 0.01, "grad_norm": 1.008144720140337, "learning_rate": 1.5923566878980892e-06, "loss": 0.7189, "step": 75 }, { "epoch": 0.01, "grad_norm": 0.7780427717780393, "learning_rate": 1.613588110403397e-06, "loss": 0.5921, "step": 76 }, { "epoch": 0.01, "grad_norm": 0.8399311727829962, "learning_rate": 1.634819532908705e-06, "loss": 0.5997, "step": 77 }, { "epoch": 0.01, "grad_norm": 0.6927282906786248, "learning_rate": 1.6560509554140127e-06, "loss": 0.5671, "step": 78 }, { "epoch": 0.01, "grad_norm": 1.2300098877330876, "learning_rate": 1.6772823779193206e-06, "loss": 0.7011, "step": 79 }, { "epoch": 0.01, "grad_norm": 0.9185988906241528, "learning_rate": 1.6985138004246284e-06, "loss": 0.6905, "step": 80 }, { "epoch": 0.01, "grad_norm": 0.9206959465561452, "learning_rate": 1.7197452229299363e-06, "loss": 0.7107, "step": 81 }, { "epoch": 0.01, "grad_norm": 0.6818779957755264, "learning_rate": 1.740976645435244e-06, "loss": 0.5602, "step": 82 }, { "epoch": 0.01, "grad_norm": 0.6498799279522858, "learning_rate": 1.7622080679405521e-06, "loss": 0.601, "step": 83 }, { "epoch": 0.01, "grad_norm": 0.888398773343403, "learning_rate": 1.78343949044586e-06, "loss": 0.6146, "step": 84 }, { "epoch": 0.01, "grad_norm": 0.962512792773225, "learning_rate": 1.8046709129511678e-06, "loss": 0.726, "step": 85 }, { "epoch": 0.01, "grad_norm": 1.14206816976848, "learning_rate": 1.8259023354564756e-06, "loss": 0.7984, "step": 86 }, { "epoch": 0.01, "grad_norm": 0.8072834925084155, "learning_rate": 1.8471337579617835e-06, "loss": 0.7032, "step": 87 }, { "epoch": 0.01, "grad_norm": 0.947602536205962, "learning_rate": 1.8683651804670913e-06, "loss": 0.6548, "step": 88 }, { "epoch": 0.01, "grad_norm": 0.9143970421375164, "learning_rate": 1.8895966029723994e-06, "loss": 0.7302, "step": 89 }, { "epoch": 0.01, "grad_norm": 0.674175642032997, "learning_rate": 1.9108280254777074e-06, "loss": 0.5131, "step": 90 }, { "epoch": 0.01, "grad_norm": 0.7886837529805113, "learning_rate": 1.9320594479830153e-06, "loss": 0.7306, "step": 91 }, { "epoch": 0.01, "grad_norm": 0.8998842154200234, "learning_rate": 1.953290870488323e-06, "loss": 0.7619, "step": 92 }, { "epoch": 0.01, "grad_norm": 0.8939754429002351, "learning_rate": 1.974522292993631e-06, "loss": 0.6977, "step": 93 }, { "epoch": 0.01, "grad_norm": 0.7267793453096345, "learning_rate": 1.9957537154989388e-06, "loss": 0.6719, "step": 94 }, { "epoch": 0.01, "grad_norm": 0.9423699329896398, "learning_rate": 2.0169851380042466e-06, "loss": 0.6696, "step": 95 }, { "epoch": 0.01, "grad_norm": 0.9127968272713965, "learning_rate": 2.0382165605095544e-06, "loss": 0.6863, "step": 96 }, { "epoch": 0.01, "grad_norm": 0.6453158741901237, "learning_rate": 2.0594479830148623e-06, "loss": 0.5726, "step": 97 }, { "epoch": 0.01, "grad_norm": 0.8004608999355025, "learning_rate": 2.08067940552017e-06, "loss": 0.6646, "step": 98 }, { "epoch": 0.01, "grad_norm": 0.741422175795783, "learning_rate": 2.101910828025478e-06, "loss": 0.6141, "step": 99 }, { "epoch": 0.01, "grad_norm": 0.6185567152561202, "learning_rate": 2.1231422505307858e-06, "loss": 0.6171, "step": 100 }, { "epoch": 0.01, "grad_norm": 0.912129457483921, "learning_rate": 2.1443736730360936e-06, "loss": 0.7509, "step": 101 }, { "epoch": 0.01, "grad_norm": 0.8247609672782598, "learning_rate": 2.1656050955414015e-06, "loss": 0.6468, "step": 102 }, { "epoch": 0.01, "grad_norm": 0.8029055653595574, "learning_rate": 2.1868365180467093e-06, "loss": 0.7296, "step": 103 }, { "epoch": 0.01, "grad_norm": 0.9851633577727037, "learning_rate": 2.208067940552017e-06, "loss": 0.744, "step": 104 }, { "epoch": 0.01, "grad_norm": 0.6679860577610721, "learning_rate": 2.229299363057325e-06, "loss": 0.5683, "step": 105 }, { "epoch": 0.01, "grad_norm": 0.819612351171737, "learning_rate": 2.250530785562633e-06, "loss": 0.6859, "step": 106 }, { "epoch": 0.01, "grad_norm": 0.8845853691655635, "learning_rate": 2.2717622080679406e-06, "loss": 0.7128, "step": 107 }, { "epoch": 0.01, "grad_norm": 0.9896458089509722, "learning_rate": 2.2929936305732485e-06, "loss": 0.6823, "step": 108 }, { "epoch": 0.01, "grad_norm": 0.8401401133737159, "learning_rate": 2.3142250530785563e-06, "loss": 0.6848, "step": 109 }, { "epoch": 0.01, "grad_norm": 0.7044845791337637, "learning_rate": 2.335456475583864e-06, "loss": 0.6287, "step": 110 }, { "epoch": 0.01, "grad_norm": 0.8936501983713924, "learning_rate": 2.356687898089172e-06, "loss": 0.6301, "step": 111 }, { "epoch": 0.01, "grad_norm": 0.7977858105718107, "learning_rate": 2.3779193205944802e-06, "loss": 0.6501, "step": 112 }, { "epoch": 0.01, "grad_norm": 0.9625024469697634, "learning_rate": 2.399150743099788e-06, "loss": 0.7136, "step": 113 }, { "epoch": 0.01, "grad_norm": 0.8202496777053818, "learning_rate": 2.420382165605096e-06, "loss": 0.6466, "step": 114 }, { "epoch": 0.01, "grad_norm": 0.8083756378016473, "learning_rate": 2.4416135881104038e-06, "loss": 0.5758, "step": 115 }, { "epoch": 0.01, "grad_norm": 0.6794575896854108, "learning_rate": 2.4628450106157116e-06, "loss": 0.6271, "step": 116 }, { "epoch": 0.01, "grad_norm": 0.920475184785038, "learning_rate": 2.4840764331210194e-06, "loss": 0.6896, "step": 117 }, { "epoch": 0.02, "grad_norm": 1.0624376066131578, "learning_rate": 2.5053078556263273e-06, "loss": 0.6885, "step": 118 }, { "epoch": 0.02, "grad_norm": 1.3494780155295418, "learning_rate": 2.526539278131635e-06, "loss": 0.6805, "step": 119 }, { "epoch": 0.02, "grad_norm": 0.6680598812484428, "learning_rate": 2.547770700636943e-06, "loss": 0.6003, "step": 120 }, { "epoch": 0.02, "grad_norm": 0.7145951603724503, "learning_rate": 2.5690021231422508e-06, "loss": 0.5845, "step": 121 }, { "epoch": 0.02, "grad_norm": 0.932029401239929, "learning_rate": 2.5902335456475586e-06, "loss": 0.7106, "step": 122 }, { "epoch": 0.02, "grad_norm": 0.6711551268440451, "learning_rate": 2.6114649681528665e-06, "loss": 0.5839, "step": 123 }, { "epoch": 0.02, "grad_norm": 0.8123840964518879, "learning_rate": 2.6326963906581743e-06, "loss": 0.6535, "step": 124 }, { "epoch": 0.02, "grad_norm": 0.9502589598494059, "learning_rate": 2.653927813163482e-06, "loss": 0.6723, "step": 125 }, { "epoch": 0.02, "grad_norm": 0.686260030484047, "learning_rate": 2.67515923566879e-06, "loss": 0.5849, "step": 126 }, { "epoch": 0.02, "grad_norm": 0.671346829133467, "learning_rate": 2.696390658174098e-06, "loss": 0.5539, "step": 127 }, { "epoch": 0.02, "grad_norm": 0.7348439508382469, "learning_rate": 2.7176220806794056e-06, "loss": 0.6279, "step": 128 }, { "epoch": 0.02, "grad_norm": 0.6657544130751492, "learning_rate": 2.7388535031847135e-06, "loss": 0.5721, "step": 129 }, { "epoch": 0.02, "grad_norm": 0.7505039321902811, "learning_rate": 2.7600849256900213e-06, "loss": 0.6523, "step": 130 }, { "epoch": 0.02, "grad_norm": 0.9081812514026412, "learning_rate": 2.781316348195329e-06, "loss": 0.7182, "step": 131 }, { "epoch": 0.02, "grad_norm": 1.1754570547070735, "learning_rate": 2.802547770700637e-06, "loss": 0.7845, "step": 132 }, { "epoch": 0.02, "grad_norm": 0.759204923968475, "learning_rate": 2.823779193205945e-06, "loss": 0.5666, "step": 133 }, { "epoch": 0.02, "grad_norm": 0.9134266282272141, "learning_rate": 2.8450106157112527e-06, "loss": 0.617, "step": 134 }, { "epoch": 0.02, "grad_norm": 0.6743132459299028, "learning_rate": 2.8662420382165605e-06, "loss": 0.5926, "step": 135 }, { "epoch": 0.02, "grad_norm": 1.2852422502660135, "learning_rate": 2.8874734607218683e-06, "loss": 0.7055, "step": 136 }, { "epoch": 0.02, "grad_norm": 1.0032638917986518, "learning_rate": 2.908704883227176e-06, "loss": 0.6845, "step": 137 }, { "epoch": 0.02, "grad_norm": 0.6545452001482567, "learning_rate": 2.9299363057324844e-06, "loss": 0.5724, "step": 138 }, { "epoch": 0.02, "grad_norm": 1.0033165293069863, "learning_rate": 2.9511677282377923e-06, "loss": 0.708, "step": 139 }, { "epoch": 0.02, "grad_norm": 0.6603942673614058, "learning_rate": 2.9723991507431e-06, "loss": 0.5816, "step": 140 }, { "epoch": 0.02, "grad_norm": 0.9929599681631902, "learning_rate": 2.993630573248408e-06, "loss": 0.652, "step": 141 }, { "epoch": 0.02, "grad_norm": 0.9195381630729605, "learning_rate": 3.0148619957537158e-06, "loss": 0.6224, "step": 142 }, { "epoch": 0.02, "grad_norm": 0.96507859825706, "learning_rate": 3.0360934182590236e-06, "loss": 0.7299, "step": 143 }, { "epoch": 0.02, "grad_norm": 0.6331253476001903, "learning_rate": 3.0573248407643314e-06, "loss": 0.5364, "step": 144 }, { "epoch": 0.02, "grad_norm": 0.7321637764611932, "learning_rate": 3.0785562632696393e-06, "loss": 0.6539, "step": 145 }, { "epoch": 0.02, "grad_norm": 0.8029848201541017, "learning_rate": 3.099787685774947e-06, "loss": 0.6919, "step": 146 }, { "epoch": 0.02, "grad_norm": 0.8112265854608945, "learning_rate": 3.121019108280255e-06, "loss": 0.5885, "step": 147 }, { "epoch": 0.02, "grad_norm": 0.945927793705502, "learning_rate": 3.142250530785563e-06, "loss": 0.6859, "step": 148 }, { "epoch": 0.02, "grad_norm": 0.6415884691227447, "learning_rate": 3.1634819532908706e-06, "loss": 0.5566, "step": 149 }, { "epoch": 0.02, "grad_norm": 0.7085088857450645, "learning_rate": 3.1847133757961785e-06, "loss": 0.6582, "step": 150 }, { "epoch": 0.02, "grad_norm": 0.7086688356979257, "learning_rate": 3.2059447983014863e-06, "loss": 0.5684, "step": 151 }, { "epoch": 0.02, "grad_norm": 0.89752749128589, "learning_rate": 3.227176220806794e-06, "loss": 0.6836, "step": 152 }, { "epoch": 0.02, "grad_norm": 0.7609285062708646, "learning_rate": 3.248407643312102e-06, "loss": 0.6002, "step": 153 }, { "epoch": 0.02, "grad_norm": 0.8250009585344907, "learning_rate": 3.26963906581741e-06, "loss": 0.6999, "step": 154 }, { "epoch": 0.02, "grad_norm": 0.6295758904840901, "learning_rate": 3.2908704883227177e-06, "loss": 0.6006, "step": 155 }, { "epoch": 0.02, "grad_norm": 0.657018296450642, "learning_rate": 3.3121019108280255e-06, "loss": 0.5709, "step": 156 }, { "epoch": 0.02, "grad_norm": 0.7453449119436517, "learning_rate": 3.3333333333333333e-06, "loss": 0.6195, "step": 157 }, { "epoch": 0.02, "grad_norm": 0.6623503898673194, "learning_rate": 3.354564755838641e-06, "loss": 0.618, "step": 158 }, { "epoch": 0.02, "grad_norm": 0.8975443110926178, "learning_rate": 3.375796178343949e-06, "loss": 0.7095, "step": 159 }, { "epoch": 0.02, "grad_norm": 0.7196920125346429, "learning_rate": 3.397027600849257e-06, "loss": 0.5944, "step": 160 }, { "epoch": 0.02, "grad_norm": 0.7274047246720723, "learning_rate": 3.4182590233545647e-06, "loss": 0.6097, "step": 161 }, { "epoch": 0.02, "grad_norm": 0.8416353775608544, "learning_rate": 3.4394904458598725e-06, "loss": 0.6731, "step": 162 }, { "epoch": 0.02, "grad_norm": 0.7565544021842142, "learning_rate": 3.4607218683651803e-06, "loss": 0.5338, "step": 163 }, { "epoch": 0.02, "grad_norm": 0.8198347197807316, "learning_rate": 3.481953290870488e-06, "loss": 0.5975, "step": 164 }, { "epoch": 0.02, "grad_norm": 0.5958765236284512, "learning_rate": 3.5031847133757964e-06, "loss": 0.5593, "step": 165 }, { "epoch": 0.02, "grad_norm": 0.8596154311044288, "learning_rate": 3.5244161358811043e-06, "loss": 0.7188, "step": 166 }, { "epoch": 0.02, "grad_norm": 1.2250817147186526, "learning_rate": 3.545647558386412e-06, "loss": 0.6885, "step": 167 }, { "epoch": 0.02, "grad_norm": 0.6555194561718294, "learning_rate": 3.56687898089172e-06, "loss": 0.584, "step": 168 }, { "epoch": 0.02, "grad_norm": 0.620333165580415, "learning_rate": 3.5881104033970278e-06, "loss": 0.5509, "step": 169 }, { "epoch": 0.02, "grad_norm": 0.8073633638541113, "learning_rate": 3.6093418259023356e-06, "loss": 0.5459, "step": 170 }, { "epoch": 0.02, "grad_norm": 0.7528890153499365, "learning_rate": 3.6305732484076435e-06, "loss": 0.6095, "step": 171 }, { "epoch": 0.02, "grad_norm": 0.9357230716689366, "learning_rate": 3.6518046709129513e-06, "loss": 0.6951, "step": 172 }, { "epoch": 0.02, "grad_norm": 0.8441946942232076, "learning_rate": 3.673036093418259e-06, "loss": 0.686, "step": 173 }, { "epoch": 0.02, "grad_norm": 0.6953738393442778, "learning_rate": 3.694267515923567e-06, "loss": 0.5587, "step": 174 }, { "epoch": 0.02, "grad_norm": 1.0614940309536194, "learning_rate": 3.715498938428875e-06, "loss": 0.6437, "step": 175 }, { "epoch": 0.02, "grad_norm": 0.8910635949918921, "learning_rate": 3.7367303609341826e-06, "loss": 0.7033, "step": 176 }, { "epoch": 0.02, "grad_norm": 0.8732162877196056, "learning_rate": 3.757961783439491e-06, "loss": 0.6226, "step": 177 }, { "epoch": 0.02, "grad_norm": 0.6165029790713727, "learning_rate": 3.7791932059447987e-06, "loss": 0.5568, "step": 178 }, { "epoch": 0.02, "grad_norm": 0.7087055049069805, "learning_rate": 3.8004246284501066e-06, "loss": 0.5936, "step": 179 }, { "epoch": 0.02, "grad_norm": 0.6614937596481818, "learning_rate": 3.821656050955415e-06, "loss": 0.6021, "step": 180 }, { "epoch": 0.02, "grad_norm": 0.7181004064853254, "learning_rate": 3.842887473460722e-06, "loss": 0.5976, "step": 181 }, { "epoch": 0.02, "grad_norm": 0.8746688456208674, "learning_rate": 3.8641188959660305e-06, "loss": 0.7057, "step": 182 }, { "epoch": 0.02, "grad_norm": 0.8168122830164994, "learning_rate": 3.885350318471338e-06, "loss": 0.6907, "step": 183 }, { "epoch": 0.02, "grad_norm": 0.7355109794661671, "learning_rate": 3.906581740976646e-06, "loss": 0.5938, "step": 184 }, { "epoch": 0.02, "grad_norm": 0.9557411818018728, "learning_rate": 3.927813163481954e-06, "loss": 0.7442, "step": 185 }, { "epoch": 0.02, "grad_norm": 0.8272193627767886, "learning_rate": 3.949044585987262e-06, "loss": 0.7387, "step": 186 }, { "epoch": 0.02, "grad_norm": 0.7601867318329757, "learning_rate": 3.970276008492569e-06, "loss": 0.5666, "step": 187 }, { "epoch": 0.02, "grad_norm": 0.7848861700574545, "learning_rate": 3.9915074309978775e-06, "loss": 0.6065, "step": 188 }, { "epoch": 0.02, "grad_norm": 0.6641571738403048, "learning_rate": 4.012738853503185e-06, "loss": 0.5761, "step": 189 }, { "epoch": 0.02, "grad_norm": 0.6167229488978831, "learning_rate": 4.033970276008493e-06, "loss": 0.6061, "step": 190 }, { "epoch": 0.02, "grad_norm": 0.9845644834250507, "learning_rate": 4.055201698513801e-06, "loss": 0.6665, "step": 191 }, { "epoch": 0.02, "grad_norm": 1.4225166467098442, "learning_rate": 4.076433121019109e-06, "loss": 0.7222, "step": 192 }, { "epoch": 0.02, "grad_norm": 0.825520396357522, "learning_rate": 4.097664543524416e-06, "loss": 0.6237, "step": 193 }, { "epoch": 0.02, "grad_norm": 0.9855794534684909, "learning_rate": 4.1188959660297246e-06, "loss": 0.7422, "step": 194 }, { "epoch": 0.02, "grad_norm": 0.8740524318615315, "learning_rate": 4.140127388535032e-06, "loss": 0.6686, "step": 195 }, { "epoch": 0.02, "grad_norm": 0.7916317776172551, "learning_rate": 4.16135881104034e-06, "loss": 0.6258, "step": 196 }, { "epoch": 0.03, "grad_norm": 0.7074471847536283, "learning_rate": 4.1825902335456485e-06, "loss": 0.6024, "step": 197 }, { "epoch": 0.03, "grad_norm": 0.7027202324237053, "learning_rate": 4.203821656050956e-06, "loss": 0.5537, "step": 198 }, { "epoch": 0.03, "grad_norm": 1.1408714976242633, "learning_rate": 4.225053078556264e-06, "loss": 0.634, "step": 199 }, { "epoch": 0.03, "grad_norm": 0.8283598588701209, "learning_rate": 4.2462845010615716e-06, "loss": 0.6226, "step": 200 }, { "epoch": 0.03, "grad_norm": 0.9313008468844602, "learning_rate": 4.26751592356688e-06, "loss": 0.7185, "step": 201 }, { "epoch": 0.03, "grad_norm": 0.8810462807052284, "learning_rate": 4.288747346072187e-06, "loss": 0.6209, "step": 202 }, { "epoch": 0.03, "grad_norm": 0.9015348702685758, "learning_rate": 4.3099787685774955e-06, "loss": 0.6665, "step": 203 }, { "epoch": 0.03, "grad_norm": 0.5941279491487401, "learning_rate": 4.331210191082803e-06, "loss": 0.5455, "step": 204 }, { "epoch": 0.03, "grad_norm": 0.6242764583731539, "learning_rate": 4.352441613588111e-06, "loss": 0.5779, "step": 205 }, { "epoch": 0.03, "grad_norm": 0.7002150286010799, "learning_rate": 4.373673036093419e-06, "loss": 0.5447, "step": 206 }, { "epoch": 0.03, "grad_norm": 0.6912798497967817, "learning_rate": 4.394904458598727e-06, "loss": 0.5988, "step": 207 }, { "epoch": 0.03, "grad_norm": 0.8690559647733447, "learning_rate": 4.416135881104034e-06, "loss": 0.6324, "step": 208 }, { "epoch": 0.03, "grad_norm": 1.0281067404352298, "learning_rate": 4.4373673036093425e-06, "loss": 0.6801, "step": 209 }, { "epoch": 0.03, "grad_norm": 0.6895297615817462, "learning_rate": 4.45859872611465e-06, "loss": 0.5542, "step": 210 }, { "epoch": 0.03, "grad_norm": 0.6504077645341889, "learning_rate": 4.479830148619958e-06, "loss": 0.5956, "step": 211 }, { "epoch": 0.03, "grad_norm": 0.8892541805800651, "learning_rate": 4.501061571125266e-06, "loss": 0.651, "step": 212 }, { "epoch": 0.03, "grad_norm": 0.7710720325367937, "learning_rate": 4.522292993630574e-06, "loss": 0.713, "step": 213 }, { "epoch": 0.03, "grad_norm": 0.675110704188669, "learning_rate": 4.543524416135881e-06, "loss": 0.5562, "step": 214 }, { "epoch": 0.03, "grad_norm": 0.7072695261962337, "learning_rate": 4.5647558386411895e-06, "loss": 0.6159, "step": 215 }, { "epoch": 0.03, "grad_norm": 0.7444640008129021, "learning_rate": 4.585987261146497e-06, "loss": 0.6267, "step": 216 }, { "epoch": 0.03, "grad_norm": 0.8671132992439867, "learning_rate": 4.607218683651805e-06, "loss": 0.6657, "step": 217 }, { "epoch": 0.03, "grad_norm": 0.7074397549705441, "learning_rate": 4.628450106157113e-06, "loss": 0.558, "step": 218 }, { "epoch": 0.03, "grad_norm": 0.7800357800835367, "learning_rate": 4.649681528662421e-06, "loss": 0.5888, "step": 219 }, { "epoch": 0.03, "grad_norm": 0.7353721538536665, "learning_rate": 4.670912951167728e-06, "loss": 0.6019, "step": 220 }, { "epoch": 0.03, "grad_norm": 0.9013761422335163, "learning_rate": 4.6921443736730366e-06, "loss": 0.636, "step": 221 }, { "epoch": 0.03, "grad_norm": 0.7469190290966377, "learning_rate": 4.713375796178344e-06, "loss": 0.5989, "step": 222 }, { "epoch": 0.03, "grad_norm": 0.7270094404967548, "learning_rate": 4.734607218683652e-06, "loss": 0.6204, "step": 223 }, { "epoch": 0.03, "grad_norm": 0.6216399989244757, "learning_rate": 4.7558386411889605e-06, "loss": 0.5971, "step": 224 }, { "epoch": 0.03, "grad_norm": 0.6066311313104265, "learning_rate": 4.777070063694268e-06, "loss": 0.6148, "step": 225 }, { "epoch": 0.03, "grad_norm": 0.5599471218785727, "learning_rate": 4.798301486199576e-06, "loss": 0.4796, "step": 226 }, { "epoch": 0.03, "grad_norm": 0.6157753345646437, "learning_rate": 4.819532908704884e-06, "loss": 0.5216, "step": 227 }, { "epoch": 0.03, "grad_norm": 0.8228195083528653, "learning_rate": 4.840764331210192e-06, "loss": 0.6043, "step": 228 }, { "epoch": 0.03, "grad_norm": 0.686772813135059, "learning_rate": 4.861995753715499e-06, "loss": 0.6483, "step": 229 }, { "epoch": 0.03, "grad_norm": 0.9872220934878595, "learning_rate": 4.8832271762208075e-06, "loss": 0.7057, "step": 230 }, { "epoch": 0.03, "grad_norm": 0.9814221220293042, "learning_rate": 4.904458598726115e-06, "loss": 0.6954, "step": 231 }, { "epoch": 0.03, "grad_norm": 0.89164011450147, "learning_rate": 4.925690021231423e-06, "loss": 0.5815, "step": 232 }, { "epoch": 0.03, "grad_norm": 0.7016484179686268, "learning_rate": 4.946921443736731e-06, "loss": 0.6257, "step": 233 }, { "epoch": 0.03, "grad_norm": 0.774515551923184, "learning_rate": 4.968152866242039e-06, "loss": 0.6715, "step": 234 }, { "epoch": 0.03, "grad_norm": 0.8137485818123509, "learning_rate": 4.989384288747346e-06, "loss": 0.6076, "step": 235 }, { "epoch": 0.03, "grad_norm": 0.7561940959299209, "learning_rate": 5.0106157112526545e-06, "loss": 0.5821, "step": 236 }, { "epoch": 0.03, "grad_norm": 0.898623103820379, "learning_rate": 5.031847133757962e-06, "loss": 0.706, "step": 237 }, { "epoch": 0.03, "grad_norm": 0.7995953466662316, "learning_rate": 5.05307855626327e-06, "loss": 0.6196, "step": 238 }, { "epoch": 0.03, "grad_norm": 0.6536283203986453, "learning_rate": 5.074309978768578e-06, "loss": 0.5753, "step": 239 }, { "epoch": 0.03, "grad_norm": 1.051355284149499, "learning_rate": 5.095541401273886e-06, "loss": 0.6036, "step": 240 }, { "epoch": 0.03, "grad_norm": 0.8991691183020971, "learning_rate": 5.116772823779193e-06, "loss": 0.6782, "step": 241 }, { "epoch": 0.03, "grad_norm": 0.6256156762749676, "learning_rate": 5.1380042462845016e-06, "loss": 0.5823, "step": 242 }, { "epoch": 0.03, "grad_norm": 0.7177094517309004, "learning_rate": 5.159235668789809e-06, "loss": 0.6223, "step": 243 }, { "epoch": 0.03, "grad_norm": 0.8711944375615034, "learning_rate": 5.180467091295117e-06, "loss": 0.6944, "step": 244 }, { "epoch": 0.03, "grad_norm": 0.8803187138200502, "learning_rate": 5.201698513800425e-06, "loss": 0.6699, "step": 245 }, { "epoch": 0.03, "grad_norm": 0.7775556410870242, "learning_rate": 5.222929936305733e-06, "loss": 0.5807, "step": 246 }, { "epoch": 0.03, "grad_norm": 0.8186463739188571, "learning_rate": 5.24416135881104e-06, "loss": 0.6677, "step": 247 }, { "epoch": 0.03, "grad_norm": 0.8494691832014075, "learning_rate": 5.265392781316349e-06, "loss": 0.7448, "step": 248 }, { "epoch": 0.03, "grad_norm": 0.7690654223032175, "learning_rate": 5.286624203821657e-06, "loss": 0.5999, "step": 249 }, { "epoch": 0.03, "grad_norm": 0.6041357286634986, "learning_rate": 5.307855626326964e-06, "loss": 0.55, "step": 250 }, { "epoch": 0.03, "grad_norm": 0.6227306599717993, "learning_rate": 5.3290870488322725e-06, "loss": 0.5158, "step": 251 }, { "epoch": 0.03, "grad_norm": 0.7241136824678885, "learning_rate": 5.35031847133758e-06, "loss": 0.6279, "step": 252 }, { "epoch": 0.03, "grad_norm": 0.6810033718421417, "learning_rate": 5.371549893842888e-06, "loss": 0.5917, "step": 253 }, { "epoch": 0.03, "grad_norm": 0.8444944767654462, "learning_rate": 5.392781316348196e-06, "loss": 0.6579, "step": 254 }, { "epoch": 0.03, "grad_norm": 0.7122852417532933, "learning_rate": 5.414012738853504e-06, "loss": 0.5785, "step": 255 }, { "epoch": 0.03, "grad_norm": 0.6718345646223689, "learning_rate": 5.435244161358811e-06, "loss": 0.5623, "step": 256 }, { "epoch": 0.03, "grad_norm": 0.8559619562534222, "learning_rate": 5.4564755838641195e-06, "loss": 0.6424, "step": 257 }, { "epoch": 0.03, "grad_norm": 1.9897427075245315, "learning_rate": 5.477707006369427e-06, "loss": 0.6599, "step": 258 }, { "epoch": 0.03, "grad_norm": 0.7495990121048346, "learning_rate": 5.498938428874735e-06, "loss": 0.7048, "step": 259 }, { "epoch": 0.03, "grad_norm": 0.7617635842160821, "learning_rate": 5.520169851380043e-06, "loss": 0.6453, "step": 260 }, { "epoch": 0.03, "grad_norm": 0.9923681855393078, "learning_rate": 5.541401273885351e-06, "loss": 0.7383, "step": 261 }, { "epoch": 0.03, "grad_norm": 0.8222305195153744, "learning_rate": 5.562632696390658e-06, "loss": 0.6413, "step": 262 }, { "epoch": 0.03, "grad_norm": 0.6626699551761589, "learning_rate": 5.5838641188959666e-06, "loss": 0.5702, "step": 263 }, { "epoch": 0.03, "grad_norm": 0.8119419623711125, "learning_rate": 5.605095541401274e-06, "loss": 0.6073, "step": 264 }, { "epoch": 0.03, "grad_norm": 0.7647961887972264, "learning_rate": 5.626326963906582e-06, "loss": 0.5806, "step": 265 }, { "epoch": 0.03, "grad_norm": 0.6166367579382988, "learning_rate": 5.64755838641189e-06, "loss": 0.5656, "step": 266 }, { "epoch": 0.03, "grad_norm": 0.799875375532442, "learning_rate": 5.668789808917198e-06, "loss": 0.649, "step": 267 }, { "epoch": 0.03, "grad_norm": 0.7275526056631297, "learning_rate": 5.690021231422505e-06, "loss": 0.6317, "step": 268 }, { "epoch": 0.03, "grad_norm": 0.6472681387536379, "learning_rate": 5.7112526539278136e-06, "loss": 0.5783, "step": 269 }, { "epoch": 0.03, "grad_norm": 0.637531562482795, "learning_rate": 5.732484076433121e-06, "loss": 0.5846, "step": 270 }, { "epoch": 0.03, "grad_norm": 0.7397922736008121, "learning_rate": 5.753715498938429e-06, "loss": 0.6684, "step": 271 }, { "epoch": 0.03, "grad_norm": 1.0205806869608305, "learning_rate": 5.774946921443737e-06, "loss": 0.6707, "step": 272 }, { "epoch": 0.03, "grad_norm": 0.8699718878359272, "learning_rate": 5.796178343949045e-06, "loss": 0.6475, "step": 273 }, { "epoch": 0.03, "grad_norm": 0.6690727819694017, "learning_rate": 5.817409766454352e-06, "loss": 0.6253, "step": 274 }, { "epoch": 0.04, "grad_norm": 0.8550115033966339, "learning_rate": 5.838641188959661e-06, "loss": 0.6021, "step": 275 }, { "epoch": 0.04, "grad_norm": 0.8652339155907169, "learning_rate": 5.859872611464969e-06, "loss": 0.6385, "step": 276 }, { "epoch": 0.04, "grad_norm": 0.8573757882631059, "learning_rate": 5.881104033970276e-06, "loss": 0.5985, "step": 277 }, { "epoch": 0.04, "grad_norm": 0.7481145556491249, "learning_rate": 5.9023354564755845e-06, "loss": 0.5614, "step": 278 }, { "epoch": 0.04, "grad_norm": 0.7603850815803599, "learning_rate": 5.923566878980892e-06, "loss": 0.5895, "step": 279 }, { "epoch": 0.04, "grad_norm": 0.7106234283638302, "learning_rate": 5.9447983014862e-06, "loss": 0.5841, "step": 280 }, { "epoch": 0.04, "grad_norm": 0.7571796686983795, "learning_rate": 5.966029723991508e-06, "loss": 0.5725, "step": 281 }, { "epoch": 0.04, "grad_norm": 0.6563732440289131, "learning_rate": 5.987261146496816e-06, "loss": 0.6088, "step": 282 }, { "epoch": 0.04, "grad_norm": 0.6977442128338657, "learning_rate": 6.008492569002123e-06, "loss": 0.5594, "step": 283 }, { "epoch": 0.04, "grad_norm": 0.706010792363827, "learning_rate": 6.0297239915074315e-06, "loss": 0.5695, "step": 284 }, { "epoch": 0.04, "grad_norm": 0.859176506136312, "learning_rate": 6.050955414012739e-06, "loss": 0.667, "step": 285 }, { "epoch": 0.04, "grad_norm": 0.8105147566645884, "learning_rate": 6.072186836518047e-06, "loss": 0.632, "step": 286 }, { "epoch": 0.04, "grad_norm": 0.8209217482786072, "learning_rate": 6.093418259023355e-06, "loss": 0.6366, "step": 287 }, { "epoch": 0.04, "grad_norm": 0.7608301811315319, "learning_rate": 6.114649681528663e-06, "loss": 0.5933, "step": 288 }, { "epoch": 0.04, "grad_norm": 0.7654007541474549, "learning_rate": 6.13588110403397e-06, "loss": 0.6515, "step": 289 }, { "epoch": 0.04, "grad_norm": 0.629612742923365, "learning_rate": 6.1571125265392786e-06, "loss": 0.571, "step": 290 }, { "epoch": 0.04, "grad_norm": 0.7279126481255758, "learning_rate": 6.178343949044586e-06, "loss": 0.6631, "step": 291 }, { "epoch": 0.04, "grad_norm": 0.6004298020074539, "learning_rate": 6.199575371549894e-06, "loss": 0.5499, "step": 292 }, { "epoch": 0.04, "grad_norm": 0.6887789599745987, "learning_rate": 6.220806794055202e-06, "loss": 0.6049, "step": 293 }, { "epoch": 0.04, "grad_norm": 0.7972200861030105, "learning_rate": 6.24203821656051e-06, "loss": 0.6428, "step": 294 }, { "epoch": 0.04, "grad_norm": 0.7465288393939438, "learning_rate": 6.263269639065817e-06, "loss": 0.587, "step": 295 }, { "epoch": 0.04, "grad_norm": 1.0574613089275766, "learning_rate": 6.284501061571126e-06, "loss": 0.6344, "step": 296 }, { "epoch": 0.04, "grad_norm": 0.9460877485374782, "learning_rate": 6.305732484076433e-06, "loss": 0.6436, "step": 297 }, { "epoch": 0.04, "grad_norm": 0.8184763277187621, "learning_rate": 6.326963906581741e-06, "loss": 0.7271, "step": 298 }, { "epoch": 0.04, "grad_norm": 0.6930575103378493, "learning_rate": 6.348195329087049e-06, "loss": 0.6041, "step": 299 }, { "epoch": 0.04, "grad_norm": 0.7531353169689462, "learning_rate": 6.369426751592357e-06, "loss": 0.5961, "step": 300 }, { "epoch": 0.04, "grad_norm": 0.7100085841084717, "learning_rate": 6.390658174097664e-06, "loss": 0.5595, "step": 301 }, { "epoch": 0.04, "grad_norm": 0.9048480632880608, "learning_rate": 6.411889596602973e-06, "loss": 0.6511, "step": 302 }, { "epoch": 0.04, "grad_norm": 0.6880597100842921, "learning_rate": 6.433121019108281e-06, "loss": 0.5716, "step": 303 }, { "epoch": 0.04, "grad_norm": 0.8482267906196166, "learning_rate": 6.454352441613588e-06, "loss": 0.6337, "step": 304 }, { "epoch": 0.04, "grad_norm": 0.7599383356399694, "learning_rate": 6.4755838641188965e-06, "loss": 0.5896, "step": 305 }, { "epoch": 0.04, "grad_norm": 0.6976717143745537, "learning_rate": 6.496815286624204e-06, "loss": 0.5877, "step": 306 }, { "epoch": 0.04, "grad_norm": 0.9891232937813345, "learning_rate": 6.518046709129512e-06, "loss": 0.741, "step": 307 }, { "epoch": 0.04, "grad_norm": 0.7442534546756299, "learning_rate": 6.53927813163482e-06, "loss": 0.6178, "step": 308 }, { "epoch": 0.04, "grad_norm": 0.7970249741724695, "learning_rate": 6.560509554140128e-06, "loss": 0.5939, "step": 309 }, { "epoch": 0.04, "grad_norm": 0.8354095101493896, "learning_rate": 6.581740976645435e-06, "loss": 0.7177, "step": 310 }, { "epoch": 0.04, "grad_norm": 0.6420658221124799, "learning_rate": 6.6029723991507436e-06, "loss": 0.5191, "step": 311 }, { "epoch": 0.04, "grad_norm": 0.7228803925846446, "learning_rate": 6.624203821656051e-06, "loss": 0.5701, "step": 312 }, { "epoch": 0.04, "grad_norm": 0.7886061379417076, "learning_rate": 6.645435244161359e-06, "loss": 0.6825, "step": 313 }, { "epoch": 0.04, "grad_norm": 0.6494021019881504, "learning_rate": 6.666666666666667e-06, "loss": 0.6025, "step": 314 }, { "epoch": 0.04, "grad_norm": 1.064902467884702, "learning_rate": 6.687898089171975e-06, "loss": 0.7276, "step": 315 }, { "epoch": 0.04, "grad_norm": 0.7076045071300145, "learning_rate": 6.709129511677282e-06, "loss": 0.5927, "step": 316 }, { "epoch": 0.04, "grad_norm": 0.6516034356887127, "learning_rate": 6.730360934182591e-06, "loss": 0.5837, "step": 317 }, { "epoch": 0.04, "grad_norm": 0.8532912970246789, "learning_rate": 6.751592356687898e-06, "loss": 0.6974, "step": 318 }, { "epoch": 0.04, "grad_norm": 0.8136814955252396, "learning_rate": 6.772823779193206e-06, "loss": 0.6814, "step": 319 }, { "epoch": 0.04, "grad_norm": 0.7694784076036806, "learning_rate": 6.794055201698514e-06, "loss": 0.6535, "step": 320 }, { "epoch": 0.04, "grad_norm": 0.6080238914599104, "learning_rate": 6.815286624203822e-06, "loss": 0.57, "step": 321 }, { "epoch": 0.04, "grad_norm": 0.8143841412568615, "learning_rate": 6.836518046709129e-06, "loss": 0.5867, "step": 322 }, { "epoch": 0.04, "grad_norm": 0.9020756728287854, "learning_rate": 6.857749469214438e-06, "loss": 0.6342, "step": 323 }, { "epoch": 0.04, "grad_norm": 0.9170186790597973, "learning_rate": 6.878980891719745e-06, "loss": 0.701, "step": 324 }, { "epoch": 0.04, "grad_norm": 0.6878753443589771, "learning_rate": 6.900212314225053e-06, "loss": 0.5742, "step": 325 }, { "epoch": 0.04, "grad_norm": 0.6721737113608273, "learning_rate": 6.921443736730361e-06, "loss": 0.5557, "step": 326 }, { "epoch": 0.04, "grad_norm": 0.6550388119556643, "learning_rate": 6.942675159235669e-06, "loss": 0.6052, "step": 327 }, { "epoch": 0.04, "grad_norm": 0.7399479184989896, "learning_rate": 6.963906581740976e-06, "loss": 0.6463, "step": 328 }, { "epoch": 0.04, "grad_norm": 0.7156549908824404, "learning_rate": 6.985138004246285e-06, "loss": 0.5728, "step": 329 }, { "epoch": 0.04, "grad_norm": 0.6718801324294158, "learning_rate": 7.006369426751593e-06, "loss": 0.5629, "step": 330 }, { "epoch": 0.04, "grad_norm": 0.7009074851724983, "learning_rate": 7.0276008492569e-06, "loss": 0.5532, "step": 331 }, { "epoch": 0.04, "grad_norm": 0.8029408411971348, "learning_rate": 7.0488322717622086e-06, "loss": 0.5271, "step": 332 }, { "epoch": 0.04, "grad_norm": 1.0431012036211642, "learning_rate": 7.070063694267516e-06, "loss": 0.6367, "step": 333 }, { "epoch": 0.04, "grad_norm": 0.8093878262347405, "learning_rate": 7.091295116772824e-06, "loss": 0.6403, "step": 334 }, { "epoch": 0.04, "grad_norm": 1.2908023804812134, "learning_rate": 7.112526539278132e-06, "loss": 0.7034, "step": 335 }, { "epoch": 0.04, "grad_norm": 1.0398537291927146, "learning_rate": 7.13375796178344e-06, "loss": 0.6809, "step": 336 }, { "epoch": 0.04, "grad_norm": 1.0350876345922804, "learning_rate": 7.154989384288747e-06, "loss": 0.6931, "step": 337 }, { "epoch": 0.04, "grad_norm": 0.7886373173499229, "learning_rate": 7.1762208067940556e-06, "loss": 0.6589, "step": 338 }, { "epoch": 0.04, "grad_norm": 0.8927862733820032, "learning_rate": 7.197452229299363e-06, "loss": 0.6935, "step": 339 }, { "epoch": 0.04, "grad_norm": 0.8259408573357077, "learning_rate": 7.218683651804671e-06, "loss": 0.6376, "step": 340 }, { "epoch": 0.04, "grad_norm": 0.7824414331887, "learning_rate": 7.239915074309979e-06, "loss": 0.5965, "step": 341 }, { "epoch": 0.04, "grad_norm": 0.6088715758221711, "learning_rate": 7.261146496815287e-06, "loss": 0.5885, "step": 342 }, { "epoch": 0.04, "grad_norm": 1.0709181495605602, "learning_rate": 7.282377919320594e-06, "loss": 0.6809, "step": 343 }, { "epoch": 0.04, "grad_norm": 0.8043326912255777, "learning_rate": 7.303609341825903e-06, "loss": 0.6297, "step": 344 }, { "epoch": 0.04, "grad_norm": 0.8565291208373391, "learning_rate": 7.32484076433121e-06, "loss": 0.6546, "step": 345 }, { "epoch": 0.04, "grad_norm": 0.624084247670802, "learning_rate": 7.346072186836518e-06, "loss": 0.5563, "step": 346 }, { "epoch": 0.04, "grad_norm": 0.7824732109326937, "learning_rate": 7.367303609341826e-06, "loss": 0.6785, "step": 347 }, { "epoch": 0.04, "grad_norm": 0.8399482169207614, "learning_rate": 7.388535031847134e-06, "loss": 0.5974, "step": 348 }, { "epoch": 0.04, "grad_norm": 0.785521414538481, "learning_rate": 7.409766454352441e-06, "loss": 0.6213, "step": 349 }, { "epoch": 0.04, "grad_norm": 0.8548704922581076, "learning_rate": 7.43099787685775e-06, "loss": 0.6807, "step": 350 }, { "epoch": 0.04, "grad_norm": 0.7805367105203607, "learning_rate": 7.452229299363057e-06, "loss": 0.6876, "step": 351 }, { "epoch": 0.04, "grad_norm": 0.7911626509097884, "learning_rate": 7.473460721868365e-06, "loss": 0.6201, "step": 352 }, { "epoch": 0.04, "grad_norm": 1.9263847215601537, "learning_rate": 7.494692144373673e-06, "loss": 0.6726, "step": 353 }, { "epoch": 0.05, "grad_norm": 0.773292037382418, "learning_rate": 7.515923566878982e-06, "loss": 0.6163, "step": 354 }, { "epoch": 0.05, "grad_norm": 0.8488440940786954, "learning_rate": 7.53715498938429e-06, "loss": 0.5657, "step": 355 }, { "epoch": 0.05, "grad_norm": 0.8985238040159557, "learning_rate": 7.5583864118895975e-06, "loss": 0.6968, "step": 356 }, { "epoch": 0.05, "grad_norm": 0.7376906256571085, "learning_rate": 7.579617834394906e-06, "loss": 0.5766, "step": 357 }, { "epoch": 0.05, "grad_norm": 0.6623660432305776, "learning_rate": 7.600849256900213e-06, "loss": 0.5424, "step": 358 }, { "epoch": 0.05, "grad_norm": 0.7753384538437651, "learning_rate": 7.622080679405521e-06, "loss": 0.5261, "step": 359 }, { "epoch": 0.05, "grad_norm": 0.7636721584433728, "learning_rate": 7.64331210191083e-06, "loss": 0.5604, "step": 360 }, { "epoch": 0.05, "grad_norm": 0.807227120074653, "learning_rate": 7.664543524416136e-06, "loss": 0.6237, "step": 361 }, { "epoch": 0.05, "grad_norm": 0.7202077397488372, "learning_rate": 7.685774946921445e-06, "loss": 0.5967, "step": 362 }, { "epoch": 0.05, "grad_norm": 0.9238434706684684, "learning_rate": 7.707006369426753e-06, "loss": 0.6284, "step": 363 }, { "epoch": 0.05, "grad_norm": 0.6904290880218553, "learning_rate": 7.728237791932061e-06, "loss": 0.5772, "step": 364 }, { "epoch": 0.05, "grad_norm": 0.6759959514969673, "learning_rate": 7.74946921443737e-06, "loss": 0.5675, "step": 365 }, { "epoch": 0.05, "grad_norm": 1.575640820985936, "learning_rate": 7.770700636942676e-06, "loss": 0.6411, "step": 366 }, { "epoch": 0.05, "grad_norm": 0.976655319737452, "learning_rate": 7.791932059447984e-06, "loss": 0.7161, "step": 367 }, { "epoch": 0.05, "grad_norm": 0.6794250834654887, "learning_rate": 7.813163481953292e-06, "loss": 0.5621, "step": 368 }, { "epoch": 0.05, "grad_norm": 0.8958077513340039, "learning_rate": 7.8343949044586e-06, "loss": 0.6572, "step": 369 }, { "epoch": 0.05, "grad_norm": 0.7730165050550669, "learning_rate": 7.855626326963907e-06, "loss": 0.6202, "step": 370 }, { "epoch": 0.05, "grad_norm": 0.744079962426877, "learning_rate": 7.876857749469215e-06, "loss": 0.6058, "step": 371 }, { "epoch": 0.05, "grad_norm": 0.8540704691717359, "learning_rate": 7.898089171974524e-06, "loss": 0.5719, "step": 372 }, { "epoch": 0.05, "grad_norm": 0.8631284956302128, "learning_rate": 7.919320594479832e-06, "loss": 0.6008, "step": 373 }, { "epoch": 0.05, "grad_norm": 0.8642598382843337, "learning_rate": 7.940552016985139e-06, "loss": 0.614, "step": 374 }, { "epoch": 0.05, "grad_norm": 0.7439967365258471, "learning_rate": 7.961783439490447e-06, "loss": 0.5856, "step": 375 }, { "epoch": 0.05, "grad_norm": 0.6795607401111311, "learning_rate": 7.983014861995755e-06, "loss": 0.5841, "step": 376 }, { "epoch": 0.05, "grad_norm": 0.7969899454925713, "learning_rate": 8.004246284501063e-06, "loss": 0.6461, "step": 377 }, { "epoch": 0.05, "grad_norm": 0.975299506666238, "learning_rate": 8.02547770700637e-06, "loss": 0.6425, "step": 378 }, { "epoch": 0.05, "grad_norm": 0.8174766590854395, "learning_rate": 8.046709129511678e-06, "loss": 0.6229, "step": 379 }, { "epoch": 0.05, "grad_norm": 0.6061374519640897, "learning_rate": 8.067940552016986e-06, "loss": 0.5521, "step": 380 }, { "epoch": 0.05, "grad_norm": 1.2087893068940179, "learning_rate": 8.089171974522295e-06, "loss": 0.7737, "step": 381 }, { "epoch": 0.05, "grad_norm": 0.8486748058122714, "learning_rate": 8.110403397027601e-06, "loss": 0.5821, "step": 382 }, { "epoch": 0.05, "grad_norm": 0.6690153487406375, "learning_rate": 8.13163481953291e-06, "loss": 0.5573, "step": 383 }, { "epoch": 0.05, "grad_norm": 0.8056415402086693, "learning_rate": 8.152866242038218e-06, "loss": 0.695, "step": 384 }, { "epoch": 0.05, "grad_norm": 0.7857803059268238, "learning_rate": 8.174097664543526e-06, "loss": 0.6159, "step": 385 }, { "epoch": 0.05, "grad_norm": 5.022417337666467, "learning_rate": 8.195329087048833e-06, "loss": 0.6367, "step": 386 }, { "epoch": 0.05, "grad_norm": 0.886779803282784, "learning_rate": 8.21656050955414e-06, "loss": 0.7315, "step": 387 }, { "epoch": 0.05, "grad_norm": 0.6185410245742909, "learning_rate": 8.237791932059449e-06, "loss": 0.5285, "step": 388 }, { "epoch": 0.05, "grad_norm": 0.6555034141382942, "learning_rate": 8.259023354564757e-06, "loss": 0.5407, "step": 389 }, { "epoch": 0.05, "grad_norm": 0.927509309026582, "learning_rate": 8.280254777070064e-06, "loss": 0.6634, "step": 390 }, { "epoch": 0.05, "grad_norm": 0.8900249257788111, "learning_rate": 8.301486199575372e-06, "loss": 0.6216, "step": 391 }, { "epoch": 0.05, "grad_norm": 0.8231308283868264, "learning_rate": 8.32271762208068e-06, "loss": 0.6526, "step": 392 }, { "epoch": 0.05, "grad_norm": 0.7226958174541385, "learning_rate": 8.343949044585989e-06, "loss": 0.6514, "step": 393 }, { "epoch": 0.05, "grad_norm": 0.6441010227146757, "learning_rate": 8.365180467091297e-06, "loss": 0.6123, "step": 394 }, { "epoch": 0.05, "grad_norm": 0.7184500228931993, "learning_rate": 8.386411889596604e-06, "loss": 0.6132, "step": 395 }, { "epoch": 0.05, "grad_norm": 0.7611063791793172, "learning_rate": 8.407643312101912e-06, "loss": 0.6107, "step": 396 }, { "epoch": 0.05, "grad_norm": 0.8854254361174873, "learning_rate": 8.42887473460722e-06, "loss": 0.6482, "step": 397 }, { "epoch": 0.05, "grad_norm": 0.7142906085084705, "learning_rate": 8.450106157112528e-06, "loss": 0.5991, "step": 398 }, { "epoch": 0.05, "grad_norm": 0.7452467444618182, "learning_rate": 8.471337579617835e-06, "loss": 0.5906, "step": 399 }, { "epoch": 0.05, "grad_norm": 0.88893485363304, "learning_rate": 8.492569002123143e-06, "loss": 0.6391, "step": 400 }, { "epoch": 0.05, "grad_norm": 0.7797099055102723, "learning_rate": 8.513800424628451e-06, "loss": 0.6288, "step": 401 }, { "epoch": 0.05, "grad_norm": 1.1476881331334823, "learning_rate": 8.53503184713376e-06, "loss": 0.6295, "step": 402 }, { "epoch": 0.05, "grad_norm": 0.6454433387905704, "learning_rate": 8.556263269639066e-06, "loss": 0.5851, "step": 403 }, { "epoch": 0.05, "grad_norm": 0.9202915443187007, "learning_rate": 8.577494692144374e-06, "loss": 0.6669, "step": 404 }, { "epoch": 0.05, "grad_norm": 0.7896740571157151, "learning_rate": 8.598726114649683e-06, "loss": 0.6247, "step": 405 }, { "epoch": 0.05, "grad_norm": 0.650371840959321, "learning_rate": 8.619957537154991e-06, "loss": 0.5711, "step": 406 }, { "epoch": 0.05, "grad_norm": 1.2264582688372754, "learning_rate": 8.641188959660298e-06, "loss": 0.6571, "step": 407 }, { "epoch": 0.05, "grad_norm": 0.7511960497743809, "learning_rate": 8.662420382165606e-06, "loss": 0.5488, "step": 408 }, { "epoch": 0.05, "grad_norm": 0.8883132225129745, "learning_rate": 8.683651804670914e-06, "loss": 0.6498, "step": 409 }, { "epoch": 0.05, "grad_norm": 0.8711475329433065, "learning_rate": 8.704883227176222e-06, "loss": 0.633, "step": 410 }, { "epoch": 0.05, "grad_norm": 0.6482577969800928, "learning_rate": 8.726114649681529e-06, "loss": 0.5435, "step": 411 }, { "epoch": 0.05, "grad_norm": 0.833716792141639, "learning_rate": 8.747346072186837e-06, "loss": 0.5639, "step": 412 }, { "epoch": 0.05, "grad_norm": 0.8404773082943031, "learning_rate": 8.768577494692145e-06, "loss": 0.6407, "step": 413 }, { "epoch": 0.05, "grad_norm": 1.003279144187182, "learning_rate": 8.789808917197454e-06, "loss": 0.6892, "step": 414 }, { "epoch": 0.05, "grad_norm": 0.702753050046326, "learning_rate": 8.81104033970276e-06, "loss": 0.5622, "step": 415 }, { "epoch": 0.05, "grad_norm": 0.7055678953855339, "learning_rate": 8.832271762208069e-06, "loss": 0.573, "step": 416 }, { "epoch": 0.05, "grad_norm": 0.7195947029496707, "learning_rate": 8.853503184713377e-06, "loss": 0.61, "step": 417 }, { "epoch": 0.05, "grad_norm": 0.7367726612748673, "learning_rate": 8.874734607218685e-06, "loss": 0.5678, "step": 418 }, { "epoch": 0.05, "grad_norm": 0.7274607829878597, "learning_rate": 8.895966029723993e-06, "loss": 0.529, "step": 419 }, { "epoch": 0.05, "grad_norm": 0.7830343262063172, "learning_rate": 8.9171974522293e-06, "loss": 0.6487, "step": 420 }, { "epoch": 0.05, "grad_norm": 0.8795400014954516, "learning_rate": 8.938428874734608e-06, "loss": 0.6744, "step": 421 }, { "epoch": 0.05, "grad_norm": 0.8163787645669263, "learning_rate": 8.959660297239916e-06, "loss": 0.6919, "step": 422 }, { "epoch": 0.05, "grad_norm": 0.6131252077612136, "learning_rate": 8.980891719745225e-06, "loss": 0.539, "step": 423 }, { "epoch": 0.05, "grad_norm": 0.6577080763662211, "learning_rate": 9.002123142250531e-06, "loss": 0.5362, "step": 424 }, { "epoch": 0.05, "grad_norm": 0.6754781830302256, "learning_rate": 9.02335456475584e-06, "loss": 0.618, "step": 425 }, { "epoch": 0.05, "grad_norm": 0.6180744659367413, "learning_rate": 9.044585987261148e-06, "loss": 0.5789, "step": 426 }, { "epoch": 0.05, "grad_norm": 0.7656745343043879, "learning_rate": 9.065817409766456e-06, "loss": 0.58, "step": 427 }, { "epoch": 0.05, "grad_norm": 0.8047908307660389, "learning_rate": 9.087048832271763e-06, "loss": 0.6189, "step": 428 }, { "epoch": 0.05, "grad_norm": 1.1378676066470146, "learning_rate": 9.10828025477707e-06, "loss": 0.6134, "step": 429 }, { "epoch": 0.05, "grad_norm": 0.7408459076539501, "learning_rate": 9.129511677282379e-06, "loss": 0.6018, "step": 430 }, { "epoch": 0.05, "grad_norm": 0.8835243577619359, "learning_rate": 9.150743099787687e-06, "loss": 0.6269, "step": 431 }, { "epoch": 0.06, "grad_norm": 0.6478236226856178, "learning_rate": 9.171974522292994e-06, "loss": 0.5543, "step": 432 }, { "epoch": 0.06, "grad_norm": 0.8829938350860344, "learning_rate": 9.193205944798302e-06, "loss": 0.6712, "step": 433 }, { "epoch": 0.06, "grad_norm": 0.8020701009095713, "learning_rate": 9.21443736730361e-06, "loss": 0.5648, "step": 434 }, { "epoch": 0.06, "grad_norm": 0.8097751212747626, "learning_rate": 9.235668789808919e-06, "loss": 0.6388, "step": 435 }, { "epoch": 0.06, "grad_norm": 0.7264245763854907, "learning_rate": 9.256900212314225e-06, "loss": 0.5783, "step": 436 }, { "epoch": 0.06, "grad_norm": 0.6658669676284141, "learning_rate": 9.278131634819534e-06, "loss": 0.5424, "step": 437 }, { "epoch": 0.06, "grad_norm": 0.7293560413896242, "learning_rate": 9.299363057324842e-06, "loss": 0.5849, "step": 438 }, { "epoch": 0.06, "grad_norm": 0.9165722911894904, "learning_rate": 9.32059447983015e-06, "loss": 0.6842, "step": 439 }, { "epoch": 0.06, "grad_norm": 0.6672157004325562, "learning_rate": 9.341825902335457e-06, "loss": 0.612, "step": 440 }, { "epoch": 0.06, "grad_norm": 1.515072527151968, "learning_rate": 9.363057324840765e-06, "loss": 0.5953, "step": 441 }, { "epoch": 0.06, "grad_norm": 0.8466680398556095, "learning_rate": 9.384288747346073e-06, "loss": 0.6494, "step": 442 }, { "epoch": 0.06, "grad_norm": 0.76149805712936, "learning_rate": 9.405520169851381e-06, "loss": 0.6328, "step": 443 }, { "epoch": 0.06, "grad_norm": 0.667792369022887, "learning_rate": 9.426751592356688e-06, "loss": 0.5881, "step": 444 }, { "epoch": 0.06, "grad_norm": 1.1000601752879977, "learning_rate": 9.447983014861996e-06, "loss": 0.5828, "step": 445 }, { "epoch": 0.06, "grad_norm": 0.7977541069806383, "learning_rate": 9.469214437367304e-06, "loss": 0.6257, "step": 446 }, { "epoch": 0.06, "grad_norm": 0.7554239986005157, "learning_rate": 9.490445859872613e-06, "loss": 0.5868, "step": 447 }, { "epoch": 0.06, "grad_norm": 0.6902154989570397, "learning_rate": 9.511677282377921e-06, "loss": 0.5741, "step": 448 }, { "epoch": 0.06, "grad_norm": 0.831785734821144, "learning_rate": 9.532908704883228e-06, "loss": 0.6632, "step": 449 }, { "epoch": 0.06, "grad_norm": 0.6589235504467976, "learning_rate": 9.554140127388536e-06, "loss": 0.5661, "step": 450 }, { "epoch": 0.06, "grad_norm": 0.6117670949273154, "learning_rate": 9.575371549893844e-06, "loss": 0.5249, "step": 451 }, { "epoch": 0.06, "grad_norm": 0.704911300521356, "learning_rate": 9.596602972399152e-06, "loss": 0.5271, "step": 452 }, { "epoch": 0.06, "grad_norm": 0.8596611607511858, "learning_rate": 9.617834394904459e-06, "loss": 0.6802, "step": 453 }, { "epoch": 0.06, "grad_norm": 0.8965616025174299, "learning_rate": 9.639065817409767e-06, "loss": 0.6273, "step": 454 }, { "epoch": 0.06, "grad_norm": 0.6109748675543724, "learning_rate": 9.660297239915075e-06, "loss": 0.6023, "step": 455 }, { "epoch": 0.06, "grad_norm": 0.7991002535309961, "learning_rate": 9.681528662420384e-06, "loss": 0.6369, "step": 456 }, { "epoch": 0.06, "grad_norm": 0.7452014876614284, "learning_rate": 9.70276008492569e-06, "loss": 0.5368, "step": 457 }, { "epoch": 0.06, "grad_norm": 0.7116054497263848, "learning_rate": 9.723991507430999e-06, "loss": 0.647, "step": 458 }, { "epoch": 0.06, "grad_norm": 0.7733574624574389, "learning_rate": 9.745222929936307e-06, "loss": 0.5834, "step": 459 }, { "epoch": 0.06, "grad_norm": 0.8673038237405521, "learning_rate": 9.766454352441615e-06, "loss": 0.6557, "step": 460 }, { "epoch": 0.06, "grad_norm": 0.9087978224410032, "learning_rate": 9.787685774946922e-06, "loss": 0.6369, "step": 461 }, { "epoch": 0.06, "grad_norm": 0.7248864196673108, "learning_rate": 9.80891719745223e-06, "loss": 0.5998, "step": 462 }, { "epoch": 0.06, "grad_norm": 0.762858181858747, "learning_rate": 9.830148619957538e-06, "loss": 0.6096, "step": 463 }, { "epoch": 0.06, "grad_norm": 0.8189284828454165, "learning_rate": 9.851380042462846e-06, "loss": 0.6268, "step": 464 }, { "epoch": 0.06, "grad_norm": 1.2234999520903838, "learning_rate": 9.872611464968153e-06, "loss": 0.6039, "step": 465 }, { "epoch": 0.06, "grad_norm": 0.6611390379618824, "learning_rate": 9.893842887473461e-06, "loss": 0.5561, "step": 466 }, { "epoch": 0.06, "grad_norm": 0.7861456650459644, "learning_rate": 9.91507430997877e-06, "loss": 0.6825, "step": 467 }, { "epoch": 0.06, "grad_norm": 0.7232391983502742, "learning_rate": 9.936305732484078e-06, "loss": 0.5668, "step": 468 }, { "epoch": 0.06, "grad_norm": 0.8245054175555814, "learning_rate": 9.957537154989384e-06, "loss": 0.6465, "step": 469 }, { "epoch": 0.06, "grad_norm": 0.8159840462089913, "learning_rate": 9.978768577494693e-06, "loss": 0.623, "step": 470 }, { "epoch": 0.06, "grad_norm": 0.6739818639719175, "learning_rate": 1e-05, "loss": 0.5829, "step": 471 }, { "epoch": 0.06, "grad_norm": 0.9169394853492169, "learning_rate": 9.99999989358299e-06, "loss": 0.6543, "step": 472 }, { "epoch": 0.06, "grad_norm": 0.732398277025336, "learning_rate": 9.99999957433196e-06, "loss": 0.604, "step": 473 }, { "epoch": 0.06, "grad_norm": 0.7935352329835511, "learning_rate": 9.999999042246928e-06, "loss": 0.6036, "step": 474 }, { "epoch": 0.06, "grad_norm": 0.7204735579547549, "learning_rate": 9.999998297327913e-06, "loss": 0.567, "step": 475 }, { "epoch": 0.06, "grad_norm": 0.9466278469784851, "learning_rate": 9.999997339574949e-06, "loss": 0.6903, "step": 476 }, { "epoch": 0.06, "grad_norm": 0.7510980657849371, "learning_rate": 9.999996168988075e-06, "loss": 0.5798, "step": 477 }, { "epoch": 0.06, "grad_norm": 0.9251013947903287, "learning_rate": 9.999994785567344e-06, "loss": 0.6208, "step": 478 }, { "epoch": 0.06, "grad_norm": 0.8823297094976791, "learning_rate": 9.99999318931281e-06, "loss": 0.6067, "step": 479 }, { "epoch": 0.06, "grad_norm": 0.8889253143015425, "learning_rate": 9.999991380224545e-06, "loss": 0.6672, "step": 480 }, { "epoch": 0.06, "grad_norm": 0.8224626528968296, "learning_rate": 9.999989358302623e-06, "loss": 0.6605, "step": 481 }, { "epoch": 0.06, "grad_norm": 0.5962750987251436, "learning_rate": 9.999987123547133e-06, "loss": 0.5454, "step": 482 }, { "epoch": 0.06, "grad_norm": 0.8329766576540251, "learning_rate": 9.999984675958169e-06, "loss": 0.6366, "step": 483 }, { "epoch": 0.06, "grad_norm": 0.8074101110714306, "learning_rate": 9.999982015535834e-06, "loss": 0.6583, "step": 484 }, { "epoch": 0.06, "grad_norm": 0.6613443421470934, "learning_rate": 9.999979142280246e-06, "loss": 0.6069, "step": 485 }, { "epoch": 0.06, "grad_norm": 0.7004874091363438, "learning_rate": 9.999976056191519e-06, "loss": 0.5727, "step": 486 }, { "epoch": 0.06, "grad_norm": 0.8654496022656083, "learning_rate": 9.999972757269792e-06, "loss": 0.6838, "step": 487 }, { "epoch": 0.06, "grad_norm": 0.723092832411239, "learning_rate": 9.9999692455152e-06, "loss": 0.5972, "step": 488 }, { "epoch": 0.06, "grad_norm": 0.718562574605943, "learning_rate": 9.999965520927895e-06, "loss": 0.5875, "step": 489 }, { "epoch": 0.06, "grad_norm": 0.6778690028989336, "learning_rate": 9.999961583508035e-06, "loss": 0.5754, "step": 490 }, { "epoch": 0.06, "grad_norm": 0.8718546884186662, "learning_rate": 9.99995743325579e-06, "loss": 0.6625, "step": 491 }, { "epoch": 0.06, "grad_norm": 0.7923702690792669, "learning_rate": 9.999953070171334e-06, "loss": 0.6471, "step": 492 }, { "epoch": 0.06, "grad_norm": 0.7507109521830716, "learning_rate": 9.999948494254853e-06, "loss": 0.63, "step": 493 }, { "epoch": 0.06, "grad_norm": 0.7205764419308681, "learning_rate": 9.999943705506544e-06, "loss": 0.5992, "step": 494 }, { "epoch": 0.06, "grad_norm": 0.6601109934385242, "learning_rate": 9.999938703926607e-06, "loss": 0.528, "step": 495 }, { "epoch": 0.06, "grad_norm": 0.6786762625720932, "learning_rate": 9.999933489515257e-06, "loss": 0.5651, "step": 496 }, { "epoch": 0.06, "grad_norm": 0.7622149354723696, "learning_rate": 9.999928062272714e-06, "loss": 0.6337, "step": 497 }, { "epoch": 0.06, "grad_norm": 0.9885600548836091, "learning_rate": 9.999922422199213e-06, "loss": 0.6804, "step": 498 }, { "epoch": 0.06, "grad_norm": 0.6422403188821002, "learning_rate": 9.999916569294994e-06, "loss": 0.5432, "step": 499 }, { "epoch": 0.06, "grad_norm": 0.7370948573188223, "learning_rate": 9.9999105035603e-06, "loss": 0.5625, "step": 500 }, { "epoch": 0.06, "grad_norm": 0.7935001229962054, "learning_rate": 9.999904224995397e-06, "loss": 0.594, "step": 501 }, { "epoch": 0.06, "grad_norm": 0.8847636308919531, "learning_rate": 9.999897733600545e-06, "loss": 0.6236, "step": 502 }, { "epoch": 0.06, "grad_norm": 0.6425009679367006, "learning_rate": 9.999891029376025e-06, "loss": 0.6326, "step": 503 }, { "epoch": 0.06, "grad_norm": 0.8989107169408984, "learning_rate": 9.999884112322122e-06, "loss": 0.6011, "step": 504 }, { "epoch": 0.06, "grad_norm": 0.8916440421695172, "learning_rate": 9.999876982439131e-06, "loss": 0.698, "step": 505 }, { "epoch": 0.06, "grad_norm": 0.8724879286301799, "learning_rate": 9.999869639727353e-06, "loss": 0.6846, "step": 506 }, { "epoch": 0.06, "grad_norm": 0.7649817263081151, "learning_rate": 9.999862084187101e-06, "loss": 0.5864, "step": 507 }, { "epoch": 0.06, "grad_norm": 0.718616029914458, "learning_rate": 9.999854315818697e-06, "loss": 0.5718, "step": 508 }, { "epoch": 0.06, "grad_norm": 0.8222526275223059, "learning_rate": 9.999846334622474e-06, "loss": 0.5788, "step": 509 }, { "epoch": 0.06, "grad_norm": 2.130838876693734, "learning_rate": 9.999838140598768e-06, "loss": 0.6831, "step": 510 }, { "epoch": 0.07, "grad_norm": 0.6932187212337148, "learning_rate": 9.99982973374793e-06, "loss": 0.5836, "step": 511 }, { "epoch": 0.07, "grad_norm": 0.8214432283624149, "learning_rate": 9.999821114070318e-06, "loss": 0.6369, "step": 512 }, { "epoch": 0.07, "grad_norm": 0.6525866559128067, "learning_rate": 9.999812281566298e-06, "loss": 0.5711, "step": 513 }, { "epoch": 0.07, "grad_norm": 0.7239474453362629, "learning_rate": 9.999803236236246e-06, "loss": 0.5785, "step": 514 }, { "epoch": 0.07, "grad_norm": 0.8555078103180713, "learning_rate": 9.999793978080548e-06, "loss": 0.6511, "step": 515 }, { "epoch": 0.07, "grad_norm": 0.732197985942794, "learning_rate": 9.999784507099598e-06, "loss": 0.5622, "step": 516 }, { "epoch": 0.07, "grad_norm": 0.8622453933353151, "learning_rate": 9.999774823293794e-06, "loss": 0.6547, "step": 517 }, { "epoch": 0.07, "grad_norm": 0.9183637108699996, "learning_rate": 9.999764926663558e-06, "loss": 0.6939, "step": 518 }, { "epoch": 0.07, "grad_norm": 0.8389181768232583, "learning_rate": 9.999754817209305e-06, "loss": 0.5699, "step": 519 }, { "epoch": 0.07, "grad_norm": 0.6502745124123062, "learning_rate": 9.999744494931465e-06, "loss": 0.6191, "step": 520 }, { "epoch": 0.07, "grad_norm": 0.6886355269499652, "learning_rate": 9.99973395983048e-06, "loss": 0.5393, "step": 521 }, { "epoch": 0.07, "grad_norm": 0.6612716942626704, "learning_rate": 9.999723211906796e-06, "loss": 0.6018, "step": 522 }, { "epoch": 0.07, "grad_norm": 0.6501186337772761, "learning_rate": 9.999712251160871e-06, "loss": 0.5573, "step": 523 }, { "epoch": 0.07, "grad_norm": 0.7706352731103632, "learning_rate": 9.999701077593174e-06, "loss": 0.6581, "step": 524 }, { "epoch": 0.07, "grad_norm": 0.7730012720034379, "learning_rate": 9.999689691204179e-06, "loss": 0.5962, "step": 525 }, { "epoch": 0.07, "grad_norm": 0.9508155364434, "learning_rate": 9.99967809199437e-06, "loss": 0.6623, "step": 526 }, { "epoch": 0.07, "grad_norm": 0.8060102874185446, "learning_rate": 9.999666279964242e-06, "loss": 0.6761, "step": 527 }, { "epoch": 0.07, "grad_norm": 0.7066786391666761, "learning_rate": 9.999654255114295e-06, "loss": 0.5771, "step": 528 }, { "epoch": 0.07, "grad_norm": 0.6828924290413217, "learning_rate": 9.999642017445045e-06, "loss": 0.62, "step": 529 }, { "epoch": 0.07, "grad_norm": 0.7222614445163584, "learning_rate": 9.99962956695701e-06, "loss": 0.6126, "step": 530 }, { "epoch": 0.07, "grad_norm": 0.9046138767923666, "learning_rate": 9.999616903650722e-06, "loss": 0.5865, "step": 531 }, { "epoch": 0.07, "grad_norm": 0.7886204704955198, "learning_rate": 9.999604027526717e-06, "loss": 0.65, "step": 532 }, { "epoch": 0.07, "grad_norm": 1.0075595298045992, "learning_rate": 9.999590938585546e-06, "loss": 0.69, "step": 533 }, { "epoch": 0.07, "grad_norm": 0.8124456029009315, "learning_rate": 9.999577636827766e-06, "loss": 0.6025, "step": 534 }, { "epoch": 0.07, "grad_norm": 0.6329892453569808, "learning_rate": 9.99956412225394e-06, "loss": 0.5329, "step": 535 }, { "epoch": 0.07, "grad_norm": 0.7952247674357384, "learning_rate": 9.999550394864647e-06, "loss": 0.5825, "step": 536 }, { "epoch": 0.07, "grad_norm": 0.8465928631531884, "learning_rate": 9.99953645466047e-06, "loss": 0.6563, "step": 537 }, { "epoch": 0.07, "grad_norm": 0.8713340741755371, "learning_rate": 9.999522301642004e-06, "loss": 0.6791, "step": 538 }, { "epoch": 0.07, "grad_norm": 0.9350628744751012, "learning_rate": 9.999507935809848e-06, "loss": 0.5694, "step": 539 }, { "epoch": 0.07, "grad_norm": 0.7993387054659695, "learning_rate": 9.999493357164616e-06, "loss": 0.5733, "step": 540 }, { "epoch": 0.07, "grad_norm": 0.6539378896746085, "learning_rate": 9.999478565706927e-06, "loss": 0.5551, "step": 541 }, { "epoch": 0.07, "grad_norm": 0.8243934943005577, "learning_rate": 9.999463561437412e-06, "loss": 0.624, "step": 542 }, { "epoch": 0.07, "grad_norm": 0.7133367340956287, "learning_rate": 9.999448344356709e-06, "loss": 0.5759, "step": 543 }, { "epoch": 0.07, "grad_norm": 0.6729087774343463, "learning_rate": 9.999432914465466e-06, "loss": 0.5409, "step": 544 }, { "epoch": 0.07, "grad_norm": 1.008624253443484, "learning_rate": 9.99941727176434e-06, "loss": 0.6462, "step": 545 }, { "epoch": 0.07, "grad_norm": 0.7191947866708757, "learning_rate": 9.999401416253997e-06, "loss": 0.5832, "step": 546 }, { "epoch": 0.07, "grad_norm": 0.6854202092498776, "learning_rate": 9.99938534793511e-06, "loss": 0.5299, "step": 547 }, { "epoch": 0.07, "grad_norm": 0.6434247509405175, "learning_rate": 9.999369066808366e-06, "loss": 0.5476, "step": 548 }, { "epoch": 0.07, "grad_norm": 0.6140147352500417, "learning_rate": 9.999352572874457e-06, "loss": 0.5594, "step": 549 }, { "epoch": 0.07, "grad_norm": 0.7705661429097387, "learning_rate": 9.999335866134084e-06, "loss": 0.6459, "step": 550 }, { "epoch": 0.07, "grad_norm": 0.8157044716675759, "learning_rate": 9.999318946587957e-06, "loss": 0.6638, "step": 551 }, { "epoch": 0.07, "grad_norm": 1.5025190176934549, "learning_rate": 9.9993018142368e-06, "loss": 0.6143, "step": 552 }, { "epoch": 0.07, "grad_norm": 0.6499419604536284, "learning_rate": 9.999284469081338e-06, "loss": 0.5732, "step": 553 }, { "epoch": 0.07, "grad_norm": 0.7374540089225985, "learning_rate": 9.999266911122314e-06, "loss": 0.5951, "step": 554 }, { "epoch": 0.07, "grad_norm": 0.6667930700380424, "learning_rate": 9.999249140360473e-06, "loss": 0.576, "step": 555 }, { "epoch": 0.07, "grad_norm": 1.0162824245358222, "learning_rate": 9.99923115679657e-06, "loss": 0.7426, "step": 556 }, { "epoch": 0.07, "grad_norm": 0.6305009829157838, "learning_rate": 9.999212960431372e-06, "loss": 0.5965, "step": 557 }, { "epoch": 0.07, "grad_norm": 0.823049052290893, "learning_rate": 9.999194551265653e-06, "loss": 0.6681, "step": 558 }, { "epoch": 0.07, "grad_norm": 0.7925088954710804, "learning_rate": 9.9991759293002e-06, "loss": 0.6394, "step": 559 }, { "epoch": 0.07, "grad_norm": 0.6887087221153546, "learning_rate": 9.9991570945358e-06, "loss": 0.5601, "step": 560 }, { "epoch": 0.07, "grad_norm": 0.631151150092096, "learning_rate": 9.999138046973258e-06, "loss": 0.5477, "step": 561 }, { "epoch": 0.07, "grad_norm": 0.8605832852629615, "learning_rate": 9.999118786613384e-06, "loss": 0.6283, "step": 562 }, { "epoch": 0.07, "grad_norm": 0.739203631837093, "learning_rate": 9.999099313457e-06, "loss": 0.6127, "step": 563 }, { "epoch": 0.07, "grad_norm": 0.6596350627733387, "learning_rate": 9.999079627504931e-06, "loss": 0.5736, "step": 564 }, { "epoch": 0.07, "grad_norm": 1.4592063027815079, "learning_rate": 9.999059728758018e-06, "loss": 0.6749, "step": 565 }, { "epoch": 0.07, "grad_norm": 1.1222640617266986, "learning_rate": 9.999039617217108e-06, "loss": 0.7198, "step": 566 }, { "epoch": 0.07, "grad_norm": 0.6920355737275525, "learning_rate": 9.999019292883053e-06, "loss": 0.612, "step": 567 }, { "epoch": 0.07, "grad_norm": 0.76675542132742, "learning_rate": 9.998998755756724e-06, "loss": 0.6004, "step": 568 }, { "epoch": 0.07, "grad_norm": 0.8098835588375225, "learning_rate": 9.998978005838993e-06, "loss": 0.5792, "step": 569 }, { "epoch": 0.07, "grad_norm": 0.9330560815304642, "learning_rate": 9.99895704313074e-06, "loss": 0.6745, "step": 570 }, { "epoch": 0.07, "grad_norm": 0.7107217808643186, "learning_rate": 9.998935867632862e-06, "loss": 0.5645, "step": 571 }, { "epoch": 0.07, "grad_norm": 0.8628341720586082, "learning_rate": 9.998914479346258e-06, "loss": 0.5851, "step": 572 }, { "epoch": 0.07, "grad_norm": 1.812296064370831, "learning_rate": 9.998892878271839e-06, "loss": 0.6905, "step": 573 }, { "epoch": 0.07, "grad_norm": 0.8344977204345659, "learning_rate": 9.998871064410524e-06, "loss": 0.636, "step": 574 }, { "epoch": 0.07, "grad_norm": 0.6655544154860575, "learning_rate": 9.998849037763243e-06, "loss": 0.6042, "step": 575 }, { "epoch": 0.07, "grad_norm": 0.9039870258725992, "learning_rate": 9.998826798330932e-06, "loss": 0.7086, "step": 576 }, { "epoch": 0.07, "grad_norm": 0.6918627084041002, "learning_rate": 9.998804346114536e-06, "loss": 0.532, "step": 577 }, { "epoch": 0.07, "grad_norm": 0.6263729431215533, "learning_rate": 9.998781681115017e-06, "loss": 0.5717, "step": 578 }, { "epoch": 0.07, "grad_norm": 0.9054569474491742, "learning_rate": 9.998758803333333e-06, "loss": 0.6493, "step": 579 }, { "epoch": 0.07, "grad_norm": 0.7819365195426208, "learning_rate": 9.99873571277046e-06, "loss": 0.6549, "step": 580 }, { "epoch": 0.07, "grad_norm": 0.7845315517848321, "learning_rate": 9.998712409427382e-06, "loss": 0.6991, "step": 581 }, { "epoch": 0.07, "grad_norm": 1.0283213258141806, "learning_rate": 9.99868889330509e-06, "loss": 0.669, "step": 582 }, { "epoch": 0.07, "grad_norm": 0.9632780379974247, "learning_rate": 9.998665164404586e-06, "loss": 0.7075, "step": 583 }, { "epoch": 0.07, "grad_norm": 0.7756967072338767, "learning_rate": 9.99864122272688e-06, "loss": 0.5049, "step": 584 }, { "epoch": 0.07, "grad_norm": 0.9607889685736162, "learning_rate": 9.99861706827299e-06, "loss": 0.6408, "step": 585 }, { "epoch": 0.07, "grad_norm": 0.6789333840195599, "learning_rate": 9.998592701043945e-06, "loss": 0.5208, "step": 586 }, { "epoch": 0.07, "grad_norm": 0.7073847955888349, "learning_rate": 9.998568121040781e-06, "loss": 0.6186, "step": 587 }, { "epoch": 0.07, "grad_norm": 0.8511169500473535, "learning_rate": 9.998543328264544e-06, "loss": 0.6493, "step": 588 }, { "epoch": 0.08, "grad_norm": 0.6728585712796856, "learning_rate": 9.998518322716294e-06, "loss": 0.5862, "step": 589 }, { "epoch": 0.08, "grad_norm": 0.8205208526183553, "learning_rate": 9.99849310439709e-06, "loss": 0.6667, "step": 590 }, { "epoch": 0.08, "grad_norm": 0.7534012734900305, "learning_rate": 9.998467673308008e-06, "loss": 0.6106, "step": 591 }, { "epoch": 0.08, "grad_norm": 0.7119869181595472, "learning_rate": 9.998442029450128e-06, "loss": 0.5755, "step": 592 }, { "epoch": 0.08, "grad_norm": 0.8701971631151063, "learning_rate": 9.998416172824545e-06, "loss": 0.7085, "step": 593 }, { "epoch": 0.08, "grad_norm": 0.9227174594042213, "learning_rate": 9.998390103432359e-06, "loss": 0.6441, "step": 594 }, { "epoch": 0.08, "grad_norm": 0.7216308708654693, "learning_rate": 9.99836382127468e-06, "loss": 0.5922, "step": 595 }, { "epoch": 0.08, "grad_norm": 0.5848478019701097, "learning_rate": 9.998337326352623e-06, "loss": 0.5544, "step": 596 }, { "epoch": 0.08, "grad_norm": 0.951394179802008, "learning_rate": 9.99831061866732e-06, "loss": 0.6307, "step": 597 }, { "epoch": 0.08, "grad_norm": 0.6242384053692704, "learning_rate": 9.998283698219905e-06, "loss": 0.543, "step": 598 }, { "epoch": 0.08, "grad_norm": 0.7794169851853867, "learning_rate": 9.998256565011525e-06, "loss": 0.6081, "step": 599 }, { "epoch": 0.08, "grad_norm": 0.8420300848884748, "learning_rate": 9.998229219043336e-06, "loss": 0.5916, "step": 600 }, { "epoch": 0.08, "grad_norm": 0.704643197328675, "learning_rate": 9.9982016603165e-06, "loss": 0.5532, "step": 601 }, { "epoch": 0.08, "grad_norm": 0.6434413239654009, "learning_rate": 9.998173888832193e-06, "loss": 0.5382, "step": 602 }, { "epoch": 0.08, "grad_norm": 1.028914317228709, "learning_rate": 9.998145904591595e-06, "loss": 0.6082, "step": 603 }, { "epoch": 0.08, "grad_norm": 0.6454334681598273, "learning_rate": 9.998117707595898e-06, "loss": 0.5557, "step": 604 }, { "epoch": 0.08, "grad_norm": 0.6891323611593384, "learning_rate": 9.9980892978463e-06, "loss": 0.5964, "step": 605 }, { "epoch": 0.08, "grad_norm": 0.6483259854376953, "learning_rate": 9.998060675344015e-06, "loss": 0.5604, "step": 606 }, { "epoch": 0.08, "grad_norm": 0.640850410937195, "learning_rate": 9.998031840090257e-06, "loss": 0.5605, "step": 607 }, { "epoch": 0.08, "grad_norm": 0.7237150931450929, "learning_rate": 9.998002792086254e-06, "loss": 0.5944, "step": 608 }, { "epoch": 0.08, "grad_norm": 0.6996628810617805, "learning_rate": 9.997973531333245e-06, "loss": 0.5957, "step": 609 }, { "epoch": 0.08, "grad_norm": 0.6173241916508667, "learning_rate": 9.997944057832474e-06, "loss": 0.5626, "step": 610 }, { "epoch": 0.08, "grad_norm": 0.9164611669637491, "learning_rate": 9.997914371585195e-06, "loss": 0.677, "step": 611 }, { "epoch": 0.08, "grad_norm": 0.7285589497635658, "learning_rate": 9.997884472592672e-06, "loss": 0.5631, "step": 612 }, { "epoch": 0.08, "grad_norm": 0.9485477673636976, "learning_rate": 9.997854360856178e-06, "loss": 0.669, "step": 613 }, { "epoch": 0.08, "grad_norm": 0.7781838708302896, "learning_rate": 9.997824036376995e-06, "loss": 0.6623, "step": 614 }, { "epoch": 0.08, "grad_norm": 0.7898792801970184, "learning_rate": 9.997793499156414e-06, "loss": 0.6471, "step": 615 }, { "epoch": 0.08, "grad_norm": 0.8308443987749227, "learning_rate": 9.997762749195735e-06, "loss": 0.6877, "step": 616 }, { "epoch": 0.08, "grad_norm": 0.6310836629954972, "learning_rate": 9.997731786496265e-06, "loss": 0.5485, "step": 617 }, { "epoch": 0.08, "grad_norm": 1.2053576611133063, "learning_rate": 9.997700611059323e-06, "loss": 0.6664, "step": 618 }, { "epoch": 0.08, "grad_norm": 0.613546971387751, "learning_rate": 9.997669222886237e-06, "loss": 0.5233, "step": 619 }, { "epoch": 0.08, "grad_norm": 0.6745170096468901, "learning_rate": 9.997637621978341e-06, "loss": 0.5831, "step": 620 }, { "epoch": 0.08, "grad_norm": 0.8062537763927432, "learning_rate": 9.997605808336985e-06, "loss": 0.6216, "step": 621 }, { "epoch": 0.08, "grad_norm": 0.6924571937105224, "learning_rate": 9.997573781963517e-06, "loss": 0.5789, "step": 622 }, { "epoch": 0.08, "grad_norm": 1.181097370897103, "learning_rate": 9.997541542859305e-06, "loss": 0.6104, "step": 623 }, { "epoch": 0.08, "grad_norm": 0.5527977444336124, "learning_rate": 9.997509091025718e-06, "loss": 0.5088, "step": 624 }, { "epoch": 0.08, "grad_norm": 0.8535412593908939, "learning_rate": 9.99747642646414e-06, "loss": 0.6658, "step": 625 }, { "epoch": 0.08, "grad_norm": 0.8290599255212616, "learning_rate": 9.997443549175957e-06, "loss": 0.642, "step": 626 }, { "epoch": 0.08, "grad_norm": 0.6585395907225443, "learning_rate": 9.997410459162575e-06, "loss": 0.5996, "step": 627 }, { "epoch": 0.08, "grad_norm": 0.7643793451239477, "learning_rate": 9.997377156425398e-06, "loss": 0.6094, "step": 628 }, { "epoch": 0.08, "grad_norm": 0.5850937661332478, "learning_rate": 9.997343640965846e-06, "loss": 0.5058, "step": 629 }, { "epoch": 0.08, "grad_norm": 0.8798977741642525, "learning_rate": 9.997309912785343e-06, "loss": 0.6818, "step": 630 }, { "epoch": 0.08, "grad_norm": 0.7845680405955845, "learning_rate": 9.997275971885326e-06, "loss": 0.6399, "step": 631 }, { "epoch": 0.08, "grad_norm": 0.787892789994041, "learning_rate": 9.997241818267241e-06, "loss": 0.5836, "step": 632 }, { "epoch": 0.08, "grad_norm": 0.871851233272999, "learning_rate": 9.997207451932539e-06, "loss": 0.7112, "step": 633 }, { "epoch": 0.08, "grad_norm": 0.8369811460124821, "learning_rate": 9.997172872882685e-06, "loss": 0.6088, "step": 634 }, { "epoch": 0.08, "grad_norm": 0.7052094714802036, "learning_rate": 9.997138081119153e-06, "loss": 0.5751, "step": 635 }, { "epoch": 0.08, "grad_norm": 0.7405549824133622, "learning_rate": 9.997103076643418e-06, "loss": 0.5486, "step": 636 }, { "epoch": 0.08, "grad_norm": 0.7189672087528193, "learning_rate": 9.997067859456975e-06, "loss": 0.5817, "step": 637 }, { "epoch": 0.08, "grad_norm": 0.7107487540224503, "learning_rate": 9.997032429561321e-06, "loss": 0.5652, "step": 638 }, { "epoch": 0.08, "grad_norm": 0.8769897123204996, "learning_rate": 9.996996786957967e-06, "loss": 0.6672, "step": 639 }, { "epoch": 0.08, "grad_norm": 0.6401910642444841, "learning_rate": 9.996960931648425e-06, "loss": 0.5351, "step": 640 }, { "epoch": 0.08, "grad_norm": 0.6778235077811764, "learning_rate": 9.996924863634225e-06, "loss": 0.5921, "step": 641 }, { "epoch": 0.08, "grad_norm": 0.7011876136636904, "learning_rate": 9.996888582916902e-06, "loss": 0.5723, "step": 642 }, { "epoch": 0.08, "grad_norm": 1.0460066107180521, "learning_rate": 9.996852089497999e-06, "loss": 0.6511, "step": 643 }, { "epoch": 0.08, "grad_norm": 0.6826216750177364, "learning_rate": 9.996815383379072e-06, "loss": 0.5215, "step": 644 }, { "epoch": 0.08, "grad_norm": 0.6729038352173468, "learning_rate": 9.99677846456168e-06, "loss": 0.5201, "step": 645 }, { "epoch": 0.08, "grad_norm": 0.6388857321520266, "learning_rate": 9.996741333047398e-06, "loss": 0.5607, "step": 646 }, { "epoch": 0.08, "grad_norm": 0.7085204374891683, "learning_rate": 9.996703988837801e-06, "loss": 0.5963, "step": 647 }, { "epoch": 0.08, "grad_norm": 0.6648291904521154, "learning_rate": 9.996666431934486e-06, "loss": 0.5623, "step": 648 }, { "epoch": 0.08, "grad_norm": 0.791919546274351, "learning_rate": 9.996628662339046e-06, "loss": 0.6429, "step": 649 }, { "epoch": 0.08, "grad_norm": 0.8183198332608097, "learning_rate": 9.996590680053091e-06, "loss": 0.632, "step": 650 }, { "epoch": 0.08, "grad_norm": 0.806752433284419, "learning_rate": 9.99655248507824e-06, "loss": 0.6869, "step": 651 }, { "epoch": 0.08, "grad_norm": 0.6296693161494359, "learning_rate": 9.996514077416114e-06, "loss": 0.5906, "step": 652 }, { "epoch": 0.08, "grad_norm": 0.7090714175483955, "learning_rate": 9.996475457068351e-06, "loss": 0.5875, "step": 653 }, { "epoch": 0.08, "grad_norm": 1.0149882615272856, "learning_rate": 9.996436624036594e-06, "loss": 0.6216, "step": 654 }, { "epoch": 0.08, "grad_norm": 0.6379949441574454, "learning_rate": 9.996397578322497e-06, "loss": 0.5716, "step": 655 }, { "epoch": 0.08, "grad_norm": 0.8184009434954894, "learning_rate": 9.996358319927719e-06, "loss": 0.6285, "step": 656 }, { "epoch": 0.08, "grad_norm": 0.7292977782606443, "learning_rate": 9.996318848853936e-06, "loss": 0.5799, "step": 657 }, { "epoch": 0.08, "grad_norm": 0.6748165000254052, "learning_rate": 9.996279165102824e-06, "loss": 0.5708, "step": 658 }, { "epoch": 0.08, "grad_norm": 0.8797755071234167, "learning_rate": 9.996239268676075e-06, "loss": 0.6819, "step": 659 }, { "epoch": 0.08, "grad_norm": 0.7414200697991348, "learning_rate": 9.996199159575385e-06, "loss": 0.6368, "step": 660 }, { "epoch": 0.08, "grad_norm": 0.6793534011032392, "learning_rate": 9.996158837802463e-06, "loss": 0.6238, "step": 661 }, { "epoch": 0.08, "grad_norm": 0.6365931996356293, "learning_rate": 9.996118303359024e-06, "loss": 0.6239, "step": 662 }, { "epoch": 0.08, "grad_norm": 1.7063336894857222, "learning_rate": 9.996077556246795e-06, "loss": 0.7095, "step": 663 }, { "epoch": 0.08, "grad_norm": 0.8983126405033272, "learning_rate": 9.996036596467509e-06, "loss": 0.6375, "step": 664 }, { "epoch": 0.08, "grad_norm": 0.6875782930744684, "learning_rate": 9.995995424022911e-06, "loss": 0.5261, "step": 665 }, { "epoch": 0.08, "grad_norm": 0.6755263244361941, "learning_rate": 9.995954038914752e-06, "loss": 0.5926, "step": 666 }, { "epoch": 0.08, "grad_norm": 1.1520147850642788, "learning_rate": 9.995912441144794e-06, "loss": 0.7104, "step": 667 }, { "epoch": 0.09, "grad_norm": 0.8267163628665787, "learning_rate": 9.995870630714808e-06, "loss": 0.6427, "step": 668 }, { "epoch": 0.09, "grad_norm": 0.839777535046645, "learning_rate": 9.995828607626574e-06, "loss": 0.6504, "step": 669 }, { "epoch": 0.09, "grad_norm": 0.6164221903425307, "learning_rate": 9.995786371881882e-06, "loss": 0.5905, "step": 670 }, { "epoch": 0.09, "grad_norm": 0.9366572634947168, "learning_rate": 9.995743923482527e-06, "loss": 0.6999, "step": 671 }, { "epoch": 0.09, "grad_norm": 0.7084248721753169, "learning_rate": 9.995701262430317e-06, "loss": 0.5855, "step": 672 }, { "epoch": 0.09, "grad_norm": 0.8142888157647624, "learning_rate": 9.995658388727067e-06, "loss": 0.6274, "step": 673 }, { "epoch": 0.09, "grad_norm": 0.60095279503971, "learning_rate": 9.995615302374607e-06, "loss": 0.5436, "step": 674 }, { "epoch": 0.09, "grad_norm": 0.6355827884716699, "learning_rate": 9.995572003374765e-06, "loss": 0.565, "step": 675 }, { "epoch": 0.09, "grad_norm": 0.6002682697245667, "learning_rate": 9.995528491729386e-06, "loss": 0.5332, "step": 676 }, { "epoch": 0.09, "grad_norm": 0.7921462760949219, "learning_rate": 9.995484767440321e-06, "loss": 0.5832, "step": 677 }, { "epoch": 0.09, "grad_norm": 0.8446136275255393, "learning_rate": 9.995440830509437e-06, "loss": 0.6251, "step": 678 }, { "epoch": 0.09, "grad_norm": 0.578197473122897, "learning_rate": 9.995396680938599e-06, "loss": 0.5717, "step": 679 }, { "epoch": 0.09, "grad_norm": 0.5975808365786478, "learning_rate": 9.995352318729685e-06, "loss": 0.5526, "step": 680 }, { "epoch": 0.09, "grad_norm": 0.8402832168714275, "learning_rate": 9.995307743884587e-06, "loss": 0.6598, "step": 681 }, { "epoch": 0.09, "grad_norm": 0.5955051072932334, "learning_rate": 9.9952629564052e-06, "loss": 0.5324, "step": 682 }, { "epoch": 0.09, "grad_norm": 0.7020067178988065, "learning_rate": 9.995217956293433e-06, "loss": 0.5926, "step": 683 }, { "epoch": 0.09, "grad_norm": 0.6494717514572398, "learning_rate": 9.995172743551199e-06, "loss": 0.5969, "step": 684 }, { "epoch": 0.09, "grad_norm": 0.9143489878257325, "learning_rate": 9.995127318180424e-06, "loss": 0.6555, "step": 685 }, { "epoch": 0.09, "grad_norm": 0.721704853088111, "learning_rate": 9.99508168018304e-06, "loss": 0.5903, "step": 686 }, { "epoch": 0.09, "grad_norm": 0.6613346090827121, "learning_rate": 9.995035829560993e-06, "loss": 0.5517, "step": 687 }, { "epoch": 0.09, "grad_norm": 0.5711868712156565, "learning_rate": 9.994989766316232e-06, "loss": 0.5386, "step": 688 }, { "epoch": 0.09, "grad_norm": 0.6276912272892899, "learning_rate": 9.994943490450719e-06, "loss": 0.5614, "step": 689 }, { "epoch": 0.09, "grad_norm": 0.6781272192810728, "learning_rate": 9.99489700196642e-06, "loss": 0.6198, "step": 690 }, { "epoch": 0.09, "grad_norm": 1.6205918684127034, "learning_rate": 9.99485030086532e-06, "loss": 0.621, "step": 691 }, { "epoch": 0.09, "grad_norm": 1.0623697303149042, "learning_rate": 9.994803387149403e-06, "loss": 0.6651, "step": 692 }, { "epoch": 0.09, "grad_norm": 0.6625208597496124, "learning_rate": 9.994756260820668e-06, "loss": 0.5595, "step": 693 }, { "epoch": 0.09, "grad_norm": 0.908463009621757, "learning_rate": 9.994708921881118e-06, "loss": 0.6057, "step": 694 }, { "epoch": 0.09, "grad_norm": 0.7078131488575676, "learning_rate": 9.994661370332772e-06, "loss": 0.5556, "step": 695 }, { "epoch": 0.09, "grad_norm": 0.6595231667492926, "learning_rate": 9.99461360617765e-06, "loss": 0.538, "step": 696 }, { "epoch": 0.09, "grad_norm": 0.6282873366296898, "learning_rate": 9.99456562941779e-06, "loss": 0.5978, "step": 697 }, { "epoch": 0.09, "grad_norm": 0.8378046457361583, "learning_rate": 9.99451744005523e-06, "loss": 0.6169, "step": 698 }, { "epoch": 0.09, "grad_norm": 0.7321909573353417, "learning_rate": 9.994469038092021e-06, "loss": 0.633, "step": 699 }, { "epoch": 0.09, "grad_norm": 0.7138296810024128, "learning_rate": 9.994420423530226e-06, "loss": 0.5621, "step": 700 }, { "epoch": 0.09, "grad_norm": 0.8574973273647994, "learning_rate": 9.994371596371916e-06, "loss": 0.6913, "step": 701 }, { "epoch": 0.09, "grad_norm": 0.7042105564808486, "learning_rate": 9.994322556619165e-06, "loss": 0.5727, "step": 702 }, { "epoch": 0.09, "grad_norm": 0.8617735874889249, "learning_rate": 9.994273304274063e-06, "loss": 0.7115, "step": 703 }, { "epoch": 0.09, "grad_norm": 1.0008493088743105, "learning_rate": 9.994223839338704e-06, "loss": 0.6561, "step": 704 }, { "epoch": 0.09, "grad_norm": 0.7520301080440177, "learning_rate": 9.994174161815198e-06, "loss": 0.6137, "step": 705 }, { "epoch": 0.09, "grad_norm": 0.6516214975658886, "learning_rate": 9.994124271705654e-06, "loss": 0.5581, "step": 706 }, { "epoch": 0.09, "grad_norm": 1.9097192762638664, "learning_rate": 9.994074169012201e-06, "loss": 0.5854, "step": 707 }, { "epoch": 0.09, "grad_norm": 0.8444910501822214, "learning_rate": 9.99402385373697e-06, "loss": 0.5931, "step": 708 }, { "epoch": 0.09, "grad_norm": 0.6834082983943086, "learning_rate": 9.9939733258821e-06, "loss": 0.5772, "step": 709 }, { "epoch": 0.09, "grad_norm": 0.9681715674561516, "learning_rate": 9.993922585449745e-06, "loss": 0.6371, "step": 710 }, { "epoch": 0.09, "grad_norm": 0.9051820415048046, "learning_rate": 9.993871632442065e-06, "loss": 0.6557, "step": 711 }, { "epoch": 0.09, "grad_norm": 0.7707641503024311, "learning_rate": 9.993820466861225e-06, "loss": 0.6534, "step": 712 }, { "epoch": 0.09, "grad_norm": 1.2328567189492285, "learning_rate": 9.99376908870941e-06, "loss": 0.6417, "step": 713 }, { "epoch": 0.09, "grad_norm": 0.7516038633024393, "learning_rate": 9.993717497988797e-06, "loss": 0.6337, "step": 714 }, { "epoch": 0.09, "grad_norm": 0.7045844606273769, "learning_rate": 9.993665694701591e-06, "loss": 0.5938, "step": 715 }, { "epoch": 0.09, "grad_norm": 0.6691251105720674, "learning_rate": 9.993613678849994e-06, "loss": 0.5311, "step": 716 }, { "epoch": 0.09, "grad_norm": 0.8419025097296773, "learning_rate": 9.99356145043622e-06, "loss": 0.6553, "step": 717 }, { "epoch": 0.09, "grad_norm": 0.6814007274604756, "learning_rate": 9.993509009462491e-06, "loss": 0.5697, "step": 718 }, { "epoch": 0.09, "grad_norm": 0.5878337898826469, "learning_rate": 9.993456355931042e-06, "loss": 0.5071, "step": 719 }, { "epoch": 0.09, "grad_norm": 0.7779635187798183, "learning_rate": 9.993403489844112e-06, "loss": 0.6374, "step": 720 }, { "epoch": 0.09, "grad_norm": 0.8100625867571185, "learning_rate": 9.99335041120395e-06, "loss": 0.5503, "step": 721 }, { "epoch": 0.09, "grad_norm": 0.6768655524400484, "learning_rate": 9.99329712001282e-06, "loss": 0.5852, "step": 722 }, { "epoch": 0.09, "grad_norm": 0.7387183957894116, "learning_rate": 9.993243616272987e-06, "loss": 0.6057, "step": 723 }, { "epoch": 0.09, "grad_norm": 0.6497037461439694, "learning_rate": 9.993189899986731e-06, "loss": 0.5253, "step": 724 }, { "epoch": 0.09, "grad_norm": 0.8513723100526019, "learning_rate": 9.993135971156335e-06, "loss": 0.6671, "step": 725 }, { "epoch": 0.09, "grad_norm": 0.8503166815417971, "learning_rate": 9.993081829784098e-06, "loss": 0.7132, "step": 726 }, { "epoch": 0.09, "grad_norm": 0.7190913432055303, "learning_rate": 9.993027475872322e-06, "loss": 0.5251, "step": 727 }, { "epoch": 0.09, "grad_norm": 0.6644874297155562, "learning_rate": 9.992972909423321e-06, "loss": 0.5253, "step": 728 }, { "epoch": 0.09, "grad_norm": 0.8234238920892054, "learning_rate": 9.99291813043942e-06, "loss": 0.5972, "step": 729 }, { "epoch": 0.09, "grad_norm": 0.7352248725035845, "learning_rate": 9.992863138922949e-06, "loss": 0.6274, "step": 730 }, { "epoch": 0.09, "grad_norm": 0.7644661183425183, "learning_rate": 9.992807934876248e-06, "loss": 0.685, "step": 731 }, { "epoch": 0.09, "grad_norm": 0.7556809583325526, "learning_rate": 9.99275251830167e-06, "loss": 0.5298, "step": 732 }, { "epoch": 0.09, "grad_norm": 0.824620178065385, "learning_rate": 9.99269688920157e-06, "loss": 0.6469, "step": 733 }, { "epoch": 0.09, "grad_norm": 0.7232993765683309, "learning_rate": 9.992641047578319e-06, "loss": 0.5896, "step": 734 }, { "epoch": 0.09, "grad_norm": 0.6835725439560649, "learning_rate": 9.99258499343429e-06, "loss": 0.5439, "step": 735 }, { "epoch": 0.09, "grad_norm": 0.6363589148560473, "learning_rate": 9.992528726771875e-06, "loss": 0.5456, "step": 736 }, { "epoch": 0.09, "grad_norm": 0.656622508428008, "learning_rate": 9.992472247593466e-06, "loss": 0.5427, "step": 737 }, { "epoch": 0.09, "grad_norm": 1.3032360495897406, "learning_rate": 9.992415555901466e-06, "loss": 0.648, "step": 738 }, { "epoch": 0.09, "grad_norm": 0.7557967211581249, "learning_rate": 9.99235865169829e-06, "loss": 0.627, "step": 739 }, { "epoch": 0.09, "grad_norm": 0.6555105421279371, "learning_rate": 9.992301534986359e-06, "loss": 0.5434, "step": 740 }, { "epoch": 0.09, "grad_norm": 0.7438722139028712, "learning_rate": 9.992244205768104e-06, "loss": 0.5501, "step": 741 }, { "epoch": 0.09, "grad_norm": 0.7776058377148094, "learning_rate": 9.992186664045966e-06, "loss": 0.5536, "step": 742 }, { "epoch": 0.09, "grad_norm": 0.6041178167844797, "learning_rate": 9.992128909822395e-06, "loss": 0.5535, "step": 743 }, { "epoch": 0.09, "grad_norm": 0.6041064112548524, "learning_rate": 9.99207094309985e-06, "loss": 0.4954, "step": 744 }, { "epoch": 0.09, "grad_norm": 0.6782353751153032, "learning_rate": 9.992012763880797e-06, "loss": 0.6132, "step": 745 }, { "epoch": 0.1, "grad_norm": 0.5807807179807045, "learning_rate": 9.991954372167711e-06, "loss": 0.4822, "step": 746 }, { "epoch": 0.1, "grad_norm": 0.8909082671096811, "learning_rate": 9.991895767963082e-06, "loss": 0.6357, "step": 747 }, { "epoch": 0.1, "grad_norm": 0.6784308680180855, "learning_rate": 9.991836951269401e-06, "loss": 0.5967, "step": 748 }, { "epoch": 0.1, "grad_norm": 0.8851869588465735, "learning_rate": 9.99177792208917e-06, "loss": 0.6157, "step": 749 }, { "epoch": 0.1, "grad_norm": 0.80918196157227, "learning_rate": 9.991718680424906e-06, "loss": 0.6044, "step": 750 }, { "epoch": 0.1, "grad_norm": 0.7570481980607737, "learning_rate": 9.991659226279132e-06, "loss": 0.6415, "step": 751 }, { "epoch": 0.1, "grad_norm": 0.6209879202808876, "learning_rate": 9.991599559654372e-06, "loss": 0.5686, "step": 752 }, { "epoch": 0.1, "grad_norm": 0.5636213452795127, "learning_rate": 9.991539680553171e-06, "loss": 0.5461, "step": 753 }, { "epoch": 0.1, "grad_norm": 0.7768031019190919, "learning_rate": 9.991479588978077e-06, "loss": 0.6046, "step": 754 }, { "epoch": 0.1, "grad_norm": 0.8738921674332195, "learning_rate": 9.991419284931645e-06, "loss": 0.6363, "step": 755 }, { "epoch": 0.1, "grad_norm": 0.9867078491125648, "learning_rate": 9.991358768416449e-06, "loss": 0.5987, "step": 756 }, { "epoch": 0.1, "grad_norm": 0.6157519853697115, "learning_rate": 9.991298039435055e-06, "loss": 0.5974, "step": 757 }, { "epoch": 0.1, "grad_norm": 0.8817679833279981, "learning_rate": 9.991237097990057e-06, "loss": 0.5939, "step": 758 }, { "epoch": 0.1, "grad_norm": 0.6540140299071304, "learning_rate": 9.991175944084044e-06, "loss": 0.54, "step": 759 }, { "epoch": 0.1, "grad_norm": 0.7985386733836422, "learning_rate": 9.991114577719621e-06, "loss": 0.6122, "step": 760 }, { "epoch": 0.1, "grad_norm": 0.664628522098663, "learning_rate": 9.9910529988994e-06, "loss": 0.5446, "step": 761 }, { "epoch": 0.1, "grad_norm": 0.7417870718986781, "learning_rate": 9.990991207626e-06, "loss": 0.6229, "step": 762 }, { "epoch": 0.1, "grad_norm": 0.6035253140147998, "learning_rate": 9.990929203902056e-06, "loss": 0.5411, "step": 763 }, { "epoch": 0.1, "grad_norm": 0.7360947084341074, "learning_rate": 9.990866987730204e-06, "loss": 0.5483, "step": 764 }, { "epoch": 0.1, "grad_norm": 0.6659011890744978, "learning_rate": 9.990804559113093e-06, "loss": 0.509, "step": 765 }, { "epoch": 0.1, "grad_norm": 0.7637223777381329, "learning_rate": 9.990741918053376e-06, "loss": 0.601, "step": 766 }, { "epoch": 0.1, "grad_norm": 0.817278269228528, "learning_rate": 9.99067906455373e-06, "loss": 0.6557, "step": 767 }, { "epoch": 0.1, "grad_norm": 0.6655728021288501, "learning_rate": 9.99061599861682e-06, "loss": 0.5226, "step": 768 }, { "epoch": 0.1, "grad_norm": 0.9914790343979668, "learning_rate": 9.990552720245336e-06, "loss": 0.6255, "step": 769 }, { "epoch": 0.1, "grad_norm": 0.7603987378373237, "learning_rate": 9.99048922944197e-06, "loss": 0.5869, "step": 770 }, { "epoch": 0.1, "grad_norm": 0.8781397184181137, "learning_rate": 9.990425526209424e-06, "loss": 0.6306, "step": 771 }, { "epoch": 0.1, "grad_norm": 0.7860454394941983, "learning_rate": 9.990361610550412e-06, "loss": 0.6214, "step": 772 }, { "epoch": 0.1, "grad_norm": 0.6146630025732014, "learning_rate": 9.990297482467653e-06, "loss": 0.582, "step": 773 }, { "epoch": 0.1, "grad_norm": 0.6136480741260867, "learning_rate": 9.990233141963877e-06, "loss": 0.5521, "step": 774 }, { "epoch": 0.1, "grad_norm": 0.8202346777365337, "learning_rate": 9.990168589041821e-06, "loss": 0.6426, "step": 775 }, { "epoch": 0.1, "grad_norm": 0.8162614768870611, "learning_rate": 9.990103823704236e-06, "loss": 0.6168, "step": 776 }, { "epoch": 0.1, "grad_norm": 0.822384912582235, "learning_rate": 9.990038845953876e-06, "loss": 0.6485, "step": 777 }, { "epoch": 0.1, "grad_norm": 0.6787019985342974, "learning_rate": 9.98997365579351e-06, "loss": 0.5608, "step": 778 }, { "epoch": 0.1, "grad_norm": 0.6934141057209189, "learning_rate": 9.989908253225911e-06, "loss": 0.5967, "step": 779 }, { "epoch": 0.1, "grad_norm": 0.860039086305572, "learning_rate": 9.989842638253861e-06, "loss": 0.7043, "step": 780 }, { "epoch": 0.1, "grad_norm": 0.6735360534206339, "learning_rate": 9.989776810880156e-06, "loss": 0.5952, "step": 781 }, { "epoch": 0.1, "grad_norm": 0.6717695884994594, "learning_rate": 9.9897107711076e-06, "loss": 0.5797, "step": 782 }, { "epoch": 0.1, "grad_norm": 0.6516625962471542, "learning_rate": 9.989644518938998e-06, "loss": 0.5605, "step": 783 }, { "epoch": 0.1, "grad_norm": 0.9424134113324836, "learning_rate": 9.989578054377174e-06, "loss": 0.6219, "step": 784 }, { "epoch": 0.1, "grad_norm": 0.7819554334442894, "learning_rate": 9.989511377424957e-06, "loss": 0.5281, "step": 785 }, { "epoch": 0.1, "grad_norm": 0.8878934838991167, "learning_rate": 9.989444488085185e-06, "loss": 0.6686, "step": 786 }, { "epoch": 0.1, "grad_norm": 0.5945890322248437, "learning_rate": 9.989377386360706e-06, "loss": 0.5234, "step": 787 }, { "epoch": 0.1, "grad_norm": 0.7394100402484586, "learning_rate": 9.989310072254375e-06, "loss": 0.5295, "step": 788 }, { "epoch": 0.1, "grad_norm": 0.6550729403258995, "learning_rate": 9.989242545769056e-06, "loss": 0.5895, "step": 789 }, { "epoch": 0.1, "grad_norm": 0.8327940750285137, "learning_rate": 9.989174806907627e-06, "loss": 0.672, "step": 790 }, { "epoch": 0.1, "grad_norm": 0.9044076708722738, "learning_rate": 9.989106855672968e-06, "loss": 0.6254, "step": 791 }, { "epoch": 0.1, "grad_norm": 0.840277815762319, "learning_rate": 9.989038692067974e-06, "loss": 0.6586, "step": 792 }, { "epoch": 0.1, "grad_norm": 0.6758925195276294, "learning_rate": 9.988970316095547e-06, "loss": 0.5875, "step": 793 }, { "epoch": 0.1, "grad_norm": 0.8431847504791554, "learning_rate": 9.988901727758594e-06, "loss": 0.6252, "step": 794 }, { "epoch": 0.1, "grad_norm": 3.227123737024226, "learning_rate": 9.988832927060038e-06, "loss": 0.741, "step": 795 }, { "epoch": 0.1, "grad_norm": 0.8854218475472951, "learning_rate": 9.988763914002806e-06, "loss": 0.6091, "step": 796 }, { "epoch": 0.1, "grad_norm": 1.5903794308945736, "learning_rate": 9.988694688589836e-06, "loss": 0.6588, "step": 797 }, { "epoch": 0.1, "grad_norm": 0.7046035355578506, "learning_rate": 9.988625250824074e-06, "loss": 0.5571, "step": 798 }, { "epoch": 0.1, "grad_norm": 0.639894445052264, "learning_rate": 9.988555600708476e-06, "loss": 0.5296, "step": 799 }, { "epoch": 0.1, "grad_norm": 0.6306065126784005, "learning_rate": 9.98848573824601e-06, "loss": 0.5398, "step": 800 }, { "epoch": 0.1, "grad_norm": 0.8263720474269912, "learning_rate": 9.988415663439645e-06, "loss": 0.6316, "step": 801 }, { "epoch": 0.1, "grad_norm": 0.6595772593698904, "learning_rate": 9.988345376292366e-06, "loss": 0.4882, "step": 802 }, { "epoch": 0.1, "grad_norm": 0.9817781299679428, "learning_rate": 9.988274876807164e-06, "loss": 0.6646, "step": 803 }, { "epoch": 0.1, "grad_norm": 0.8816016131007491, "learning_rate": 9.988204164987042e-06, "loss": 0.6662, "step": 804 }, { "epoch": 0.1, "grad_norm": 0.6877624983632619, "learning_rate": 9.988133240835008e-06, "loss": 0.5817, "step": 805 }, { "epoch": 0.1, "grad_norm": 1.1979913318985735, "learning_rate": 9.988062104354083e-06, "loss": 0.6077, "step": 806 }, { "epoch": 0.1, "grad_norm": 0.818338647993114, "learning_rate": 9.987990755547293e-06, "loss": 0.662, "step": 807 }, { "epoch": 0.1, "grad_norm": 0.6286575467844161, "learning_rate": 9.987919194417675e-06, "loss": 0.5648, "step": 808 }, { "epoch": 0.1, "grad_norm": 0.668826912313625, "learning_rate": 9.987847420968278e-06, "loss": 0.5823, "step": 809 }, { "epoch": 0.1, "grad_norm": 0.8283074214210586, "learning_rate": 9.987775435202153e-06, "loss": 0.6711, "step": 810 }, { "epoch": 0.1, "grad_norm": 0.5467020703048837, "learning_rate": 9.987703237122366e-06, "loss": 0.515, "step": 811 }, { "epoch": 0.1, "grad_norm": 0.991593659635225, "learning_rate": 9.987630826731993e-06, "loss": 0.6609, "step": 812 }, { "epoch": 0.1, "grad_norm": 0.7969645835899805, "learning_rate": 9.987558204034114e-06, "loss": 0.6772, "step": 813 }, { "epoch": 0.1, "grad_norm": 0.8937788622731796, "learning_rate": 9.987485369031817e-06, "loss": 0.7098, "step": 814 }, { "epoch": 0.1, "grad_norm": 0.608424689955248, "learning_rate": 9.987412321728209e-06, "loss": 0.5166, "step": 815 }, { "epoch": 0.1, "grad_norm": 0.8267505483392998, "learning_rate": 9.987339062126394e-06, "loss": 0.6073, "step": 816 }, { "epoch": 0.1, "grad_norm": 0.6303518988797593, "learning_rate": 9.987265590229494e-06, "loss": 0.5204, "step": 817 }, { "epoch": 0.1, "grad_norm": 0.6519974039844972, "learning_rate": 9.987191906040634e-06, "loss": 0.5612, "step": 818 }, { "epoch": 0.1, "grad_norm": 0.6690501483398921, "learning_rate": 9.98711800956295e-06, "loss": 0.544, "step": 819 }, { "epoch": 0.1, "grad_norm": 0.6613313944575195, "learning_rate": 9.98704390079959e-06, "loss": 0.5547, "step": 820 }, { "epoch": 0.1, "grad_norm": 1.2262546884070182, "learning_rate": 9.986969579753706e-06, "loss": 0.6491, "step": 821 }, { "epoch": 0.1, "grad_norm": 0.6410866164962201, "learning_rate": 9.986895046428467e-06, "loss": 0.5322, "step": 822 }, { "epoch": 0.1, "grad_norm": 0.7479005045701804, "learning_rate": 9.986820300827038e-06, "loss": 0.5812, "step": 823 }, { "epoch": 0.1, "grad_norm": 0.8574836652317681, "learning_rate": 9.986745342952605e-06, "loss": 0.6477, "step": 824 }, { "epoch": 0.11, "grad_norm": 0.808972861030263, "learning_rate": 9.986670172808359e-06, "loss": 0.6282, "step": 825 }, { "epoch": 0.11, "grad_norm": 0.7115535529703887, "learning_rate": 9.9865947903975e-06, "loss": 0.499, "step": 826 }, { "epoch": 0.11, "grad_norm": 0.7978214995594192, "learning_rate": 9.986519195723233e-06, "loss": 0.649, "step": 827 }, { "epoch": 0.11, "grad_norm": 0.6958941154244207, "learning_rate": 9.986443388788781e-06, "loss": 0.5564, "step": 828 }, { "epoch": 0.11, "grad_norm": 0.8236487676418658, "learning_rate": 9.986367369597366e-06, "loss": 0.5772, "step": 829 }, { "epoch": 0.11, "grad_norm": 0.7713431504854724, "learning_rate": 9.986291138152227e-06, "loss": 0.572, "step": 830 }, { "epoch": 0.11, "grad_norm": 0.6744967713967663, "learning_rate": 9.986214694456609e-06, "loss": 0.5558, "step": 831 }, { "epoch": 0.11, "grad_norm": 0.7719304482937637, "learning_rate": 9.986138038513765e-06, "loss": 0.596, "step": 832 }, { "epoch": 0.11, "grad_norm": 0.6605250408593111, "learning_rate": 9.986061170326958e-06, "loss": 0.6089, "step": 833 }, { "epoch": 0.11, "grad_norm": 0.7431533406246252, "learning_rate": 9.98598408989946e-06, "loss": 0.544, "step": 834 }, { "epoch": 0.11, "grad_norm": 0.6748294392612384, "learning_rate": 9.98590679723455e-06, "loss": 0.5397, "step": 835 }, { "epoch": 0.11, "grad_norm": 0.7118939115746294, "learning_rate": 9.985829292335523e-06, "loss": 0.6153, "step": 836 }, { "epoch": 0.11, "grad_norm": 0.6076472785108435, "learning_rate": 9.985751575205676e-06, "loss": 0.5244, "step": 837 }, { "epoch": 0.11, "grad_norm": 0.8751842414285672, "learning_rate": 9.985673645848315e-06, "loss": 0.6215, "step": 838 }, { "epoch": 0.11, "grad_norm": 0.8569561121943189, "learning_rate": 9.985595504266758e-06, "loss": 0.609, "step": 839 }, { "epoch": 0.11, "grad_norm": 0.8646887324930379, "learning_rate": 9.985517150464335e-06, "loss": 0.7117, "step": 840 }, { "epoch": 0.11, "grad_norm": 0.7605808467254423, "learning_rate": 9.985438584444375e-06, "loss": 0.5617, "step": 841 }, { "epoch": 0.11, "grad_norm": 0.7730492847886752, "learning_rate": 9.985359806210229e-06, "loss": 0.6153, "step": 842 }, { "epoch": 0.11, "grad_norm": 0.6678351603223434, "learning_rate": 9.985280815765244e-06, "loss": 0.5506, "step": 843 }, { "epoch": 0.11, "grad_norm": 0.6927775493943853, "learning_rate": 9.985201613112788e-06, "loss": 0.5791, "step": 844 }, { "epoch": 0.11, "grad_norm": 0.8481325219175609, "learning_rate": 9.985122198256227e-06, "loss": 0.5568, "step": 845 }, { "epoch": 0.11, "grad_norm": 0.5960932300090971, "learning_rate": 9.985042571198947e-06, "loss": 0.5035, "step": 846 }, { "epoch": 0.11, "grad_norm": 0.875815844031176, "learning_rate": 9.984962731944332e-06, "loss": 0.659, "step": 847 }, { "epoch": 0.11, "grad_norm": 0.7295163582351202, "learning_rate": 9.984882680495784e-06, "loss": 0.6118, "step": 848 }, { "epoch": 0.11, "grad_norm": 0.7520869448489738, "learning_rate": 9.984802416856711e-06, "loss": 0.5664, "step": 849 }, { "epoch": 0.11, "grad_norm": 0.8502409940694009, "learning_rate": 9.984721941030528e-06, "loss": 0.6992, "step": 850 }, { "epoch": 0.11, "grad_norm": 0.7819413543841294, "learning_rate": 9.984641253020659e-06, "loss": 0.5361, "step": 851 }, { "epoch": 0.11, "grad_norm": 0.871924910992041, "learning_rate": 9.984560352830542e-06, "loss": 0.6563, "step": 852 }, { "epoch": 0.11, "grad_norm": 0.7098584975906701, "learning_rate": 9.98447924046362e-06, "loss": 0.5937, "step": 853 }, { "epoch": 0.11, "grad_norm": 0.6019504772808552, "learning_rate": 9.984397915923344e-06, "loss": 0.5419, "step": 854 }, { "epoch": 0.11, "grad_norm": 0.8800667340065077, "learning_rate": 9.984316379213175e-06, "loss": 0.6453, "step": 855 }, { "epoch": 0.11, "grad_norm": 0.9498703164426573, "learning_rate": 9.984234630336586e-06, "loss": 0.6814, "step": 856 }, { "epoch": 0.11, "grad_norm": 0.6556240441790118, "learning_rate": 9.984152669297058e-06, "loss": 0.5306, "step": 857 }, { "epoch": 0.11, "grad_norm": 0.9087981299754682, "learning_rate": 9.984070496098076e-06, "loss": 0.6071, "step": 858 }, { "epoch": 0.11, "grad_norm": 0.5963703935302598, "learning_rate": 9.983988110743141e-06, "loss": 0.5068, "step": 859 }, { "epoch": 0.11, "grad_norm": 0.7657075869969863, "learning_rate": 9.983905513235758e-06, "loss": 0.5896, "step": 860 }, { "epoch": 0.11, "grad_norm": 0.7436250195216065, "learning_rate": 9.983822703579445e-06, "loss": 0.5575, "step": 861 }, { "epoch": 0.11, "grad_norm": 0.6023354584527244, "learning_rate": 9.983739681777723e-06, "loss": 0.554, "step": 862 }, { "epoch": 0.11, "grad_norm": 1.1627523945571974, "learning_rate": 9.983656447834129e-06, "loss": 0.6125, "step": 863 }, { "epoch": 0.11, "grad_norm": 0.8267175821570756, "learning_rate": 9.983573001752208e-06, "loss": 0.6356, "step": 864 }, { "epoch": 0.11, "grad_norm": 0.6092360522288158, "learning_rate": 9.983489343535506e-06, "loss": 0.5403, "step": 865 }, { "epoch": 0.11, "grad_norm": 0.908618839583729, "learning_rate": 9.98340547318759e-06, "loss": 0.6543, "step": 866 }, { "epoch": 0.11, "grad_norm": 1.125831052765039, "learning_rate": 9.983321390712028e-06, "loss": 0.6917, "step": 867 }, { "epoch": 0.11, "grad_norm": 0.7682568756384482, "learning_rate": 9.983237096112397e-06, "loss": 0.6885, "step": 868 }, { "epoch": 0.11, "grad_norm": 0.6470432155642117, "learning_rate": 9.98315258939229e-06, "loss": 0.5309, "step": 869 }, { "epoch": 0.11, "grad_norm": 1.084456252229607, "learning_rate": 9.983067870555297e-06, "loss": 0.6374, "step": 870 }, { "epoch": 0.11, "grad_norm": 0.8018245820192976, "learning_rate": 9.98298293960503e-06, "loss": 0.5757, "step": 871 }, { "epoch": 0.11, "grad_norm": 0.9643134256057387, "learning_rate": 9.982897796545104e-06, "loss": 0.6722, "step": 872 }, { "epoch": 0.11, "grad_norm": 0.8580838189426214, "learning_rate": 9.982812441379141e-06, "loss": 0.6549, "step": 873 }, { "epoch": 0.11, "grad_norm": 0.7800072711229105, "learning_rate": 9.982726874110776e-06, "loss": 0.687, "step": 874 }, { "epoch": 0.11, "grad_norm": 1.0363934860983977, "learning_rate": 9.982641094743648e-06, "loss": 0.6925, "step": 875 }, { "epoch": 0.11, "grad_norm": 0.7987480249623445, "learning_rate": 9.982555103281413e-06, "loss": 0.6675, "step": 876 }, { "epoch": 0.11, "grad_norm": 0.6689629806115877, "learning_rate": 9.982468899727728e-06, "loss": 0.5386, "step": 877 }, { "epoch": 0.11, "grad_norm": 0.9533975614522542, "learning_rate": 9.982382484086263e-06, "loss": 0.6647, "step": 878 }, { "epoch": 0.11, "grad_norm": 0.7022065148065463, "learning_rate": 9.9822958563607e-06, "loss": 0.5639, "step": 879 }, { "epoch": 0.11, "grad_norm": 0.8822886302244307, "learning_rate": 9.98220901655472e-06, "loss": 0.6937, "step": 880 }, { "epoch": 0.11, "grad_norm": 0.9593099526128606, "learning_rate": 9.982121964672027e-06, "loss": 0.6368, "step": 881 }, { "epoch": 0.11, "grad_norm": 0.8376183826694517, "learning_rate": 9.98203470071632e-06, "loss": 0.5754, "step": 882 }, { "epoch": 0.11, "grad_norm": 0.8580188257672448, "learning_rate": 9.981947224691316e-06, "loss": 0.6551, "step": 883 }, { "epoch": 0.11, "grad_norm": 0.6865148549538115, "learning_rate": 9.98185953660074e-06, "loss": 0.6089, "step": 884 }, { "epoch": 0.11, "grad_norm": 0.749497520981886, "learning_rate": 9.981771636448323e-06, "loss": 0.5604, "step": 885 }, { "epoch": 0.11, "grad_norm": 1.0383945444894498, "learning_rate": 9.981683524237805e-06, "loss": 0.7119, "step": 886 }, { "epoch": 0.11, "grad_norm": 0.736463033196381, "learning_rate": 9.98159519997294e-06, "loss": 0.5953, "step": 887 }, { "epoch": 0.11, "grad_norm": 0.618337131288357, "learning_rate": 9.981506663657486e-06, "loss": 0.5805, "step": 888 }, { "epoch": 0.11, "grad_norm": 0.6831445190302305, "learning_rate": 9.981417915295213e-06, "loss": 0.5492, "step": 889 }, { "epoch": 0.11, "grad_norm": 0.8456877619742489, "learning_rate": 9.981328954889896e-06, "loss": 0.6944, "step": 890 }, { "epoch": 0.11, "grad_norm": 0.8339034668691195, "learning_rate": 9.981239782445325e-06, "loss": 0.6603, "step": 891 }, { "epoch": 0.11, "grad_norm": 0.8107358052589377, "learning_rate": 9.981150397965293e-06, "loss": 0.6619, "step": 892 }, { "epoch": 0.11, "grad_norm": 0.5933002724612979, "learning_rate": 9.981060801453605e-06, "loss": 0.5694, "step": 893 }, { "epoch": 0.11, "grad_norm": 0.703868613588881, "learning_rate": 9.980970992914079e-06, "loss": 0.5536, "step": 894 }, { "epoch": 0.11, "grad_norm": 1.0813069844773486, "learning_rate": 9.980880972350533e-06, "loss": 0.6424, "step": 895 }, { "epoch": 0.11, "grad_norm": 0.8435970727418484, "learning_rate": 9.980790739766801e-06, "loss": 0.6975, "step": 896 }, { "epoch": 0.11, "grad_norm": 0.6400389588171608, "learning_rate": 9.980700295166724e-06, "loss": 0.5398, "step": 897 }, { "epoch": 0.11, "grad_norm": 1.0833546007836785, "learning_rate": 9.98060963855415e-06, "loss": 0.7218, "step": 898 }, { "epoch": 0.11, "grad_norm": 0.6181242714505477, "learning_rate": 9.980518769932938e-06, "loss": 0.5475, "step": 899 }, { "epoch": 0.11, "grad_norm": 0.8244564941672826, "learning_rate": 9.980427689306962e-06, "loss": 0.6312, "step": 900 }, { "epoch": 0.11, "grad_norm": 0.9318019554131991, "learning_rate": 9.98033639668009e-06, "loss": 0.6611, "step": 901 }, { "epoch": 0.11, "grad_norm": 1.4396113769073684, "learning_rate": 9.980244892056216e-06, "loss": 0.711, "step": 902 }, { "epoch": 0.12, "grad_norm": 0.9624421894783178, "learning_rate": 9.980153175439229e-06, "loss": 0.6278, "step": 903 }, { "epoch": 0.12, "grad_norm": 0.6600662675780681, "learning_rate": 9.980061246833037e-06, "loss": 0.5735, "step": 904 }, { "epoch": 0.12, "grad_norm": 0.8117856760783336, "learning_rate": 9.979969106241551e-06, "loss": 0.6374, "step": 905 }, { "epoch": 0.12, "grad_norm": 0.6889655407137212, "learning_rate": 9.979876753668695e-06, "loss": 0.6147, "step": 906 }, { "epoch": 0.12, "grad_norm": 0.7440401528217445, "learning_rate": 9.979784189118398e-06, "loss": 0.5489, "step": 907 }, { "epoch": 0.12, "grad_norm": 0.7987482256491769, "learning_rate": 9.979691412594601e-06, "loss": 0.6265, "step": 908 }, { "epoch": 0.12, "grad_norm": 0.8769697592520569, "learning_rate": 9.979598424101253e-06, "loss": 0.6605, "step": 909 }, { "epoch": 0.12, "grad_norm": 0.5906352109625501, "learning_rate": 9.979505223642314e-06, "loss": 0.5676, "step": 910 }, { "epoch": 0.12, "grad_norm": 0.811207726950151, "learning_rate": 9.97941181122175e-06, "loss": 0.6785, "step": 911 }, { "epoch": 0.12, "grad_norm": 0.8088554766434953, "learning_rate": 9.979318186843536e-06, "loss": 0.6877, "step": 912 }, { "epoch": 0.12, "grad_norm": 0.8193905623642039, "learning_rate": 9.979224350511658e-06, "loss": 0.6505, "step": 913 }, { "epoch": 0.12, "grad_norm": 0.7438195458920674, "learning_rate": 9.979130302230112e-06, "loss": 0.5947, "step": 914 }, { "epoch": 0.12, "grad_norm": 0.8952939406382615, "learning_rate": 9.979036042002899e-06, "loss": 0.7302, "step": 915 }, { "epoch": 0.12, "grad_norm": 0.6503815007052984, "learning_rate": 9.978941569834033e-06, "loss": 0.574, "step": 916 }, { "epoch": 0.12, "grad_norm": 0.6709939905326485, "learning_rate": 9.978846885727536e-06, "loss": 0.5437, "step": 917 }, { "epoch": 0.12, "grad_norm": 0.8905563115820866, "learning_rate": 9.978751989687437e-06, "loss": 0.6812, "step": 918 }, { "epoch": 0.12, "grad_norm": 0.844480162747943, "learning_rate": 9.978656881717774e-06, "loss": 0.6143, "step": 919 }, { "epoch": 0.12, "grad_norm": 0.6506977904792782, "learning_rate": 9.978561561822598e-06, "loss": 0.5209, "step": 920 }, { "epoch": 0.12, "grad_norm": 0.7591810938046205, "learning_rate": 9.978466030005964e-06, "loss": 0.5882, "step": 921 }, { "epoch": 0.12, "grad_norm": 0.789580194552697, "learning_rate": 9.97837028627194e-06, "loss": 0.6269, "step": 922 }, { "epoch": 0.12, "grad_norm": 0.61162725932201, "learning_rate": 9.978274330624604e-06, "loss": 0.5384, "step": 923 }, { "epoch": 0.12, "grad_norm": 0.7958299395610733, "learning_rate": 9.978178163068035e-06, "loss": 0.6472, "step": 924 }, { "epoch": 0.12, "grad_norm": 0.718883200546543, "learning_rate": 9.978081783606332e-06, "loss": 0.5932, "step": 925 }, { "epoch": 0.12, "grad_norm": 1.0171229312281451, "learning_rate": 9.977985192243596e-06, "loss": 0.6725, "step": 926 }, { "epoch": 0.12, "grad_norm": 0.9366746409763742, "learning_rate": 9.977888388983935e-06, "loss": 0.7197, "step": 927 }, { "epoch": 0.12, "grad_norm": 0.6951243322287972, "learning_rate": 9.977791373831474e-06, "loss": 0.5666, "step": 928 }, { "epoch": 0.12, "grad_norm": 0.9492044887336627, "learning_rate": 9.97769414679034e-06, "loss": 0.6385, "step": 929 }, { "epoch": 0.12, "grad_norm": 0.6958637272327954, "learning_rate": 9.977596707864673e-06, "loss": 0.569, "step": 930 }, { "epoch": 0.12, "grad_norm": 0.6150529362764223, "learning_rate": 9.97749905705862e-06, "loss": 0.5312, "step": 931 }, { "epoch": 0.12, "grad_norm": 0.8477634045500256, "learning_rate": 9.97740119437634e-06, "loss": 0.6062, "step": 932 }, { "epoch": 0.12, "grad_norm": 1.8303457600093067, "learning_rate": 9.977303119821994e-06, "loss": 0.6604, "step": 933 }, { "epoch": 0.12, "grad_norm": 0.6807067924550453, "learning_rate": 9.977204833399761e-06, "loss": 0.5853, "step": 934 }, { "epoch": 0.12, "grad_norm": 0.7044752600345968, "learning_rate": 9.977106335113821e-06, "loss": 0.5931, "step": 935 }, { "epoch": 0.12, "grad_norm": 0.6435129007633353, "learning_rate": 9.977007624968371e-06, "loss": 0.5699, "step": 936 }, { "epoch": 0.12, "grad_norm": 0.6334797029424789, "learning_rate": 9.97690870296761e-06, "loss": 0.5531, "step": 937 }, { "epoch": 0.12, "grad_norm": 0.6622043096372564, "learning_rate": 9.976809569115749e-06, "loss": 0.6299, "step": 938 }, { "epoch": 0.12, "grad_norm": 0.6378979654875966, "learning_rate": 9.97671022341701e-06, "loss": 0.5171, "step": 939 }, { "epoch": 0.12, "grad_norm": 0.5915013537029304, "learning_rate": 9.976610665875616e-06, "loss": 0.5631, "step": 940 }, { "epoch": 0.12, "grad_norm": 0.6343711762016183, "learning_rate": 9.976510896495813e-06, "loss": 0.5886, "step": 941 }, { "epoch": 0.12, "grad_norm": 0.7789674759810145, "learning_rate": 9.976410915281842e-06, "loss": 0.648, "step": 942 }, { "epoch": 0.12, "grad_norm": 0.6231699029833191, "learning_rate": 9.97631072223796e-06, "loss": 0.5565, "step": 943 }, { "epoch": 0.12, "grad_norm": 0.8113065507507176, "learning_rate": 9.976210317368436e-06, "loss": 0.6295, "step": 944 }, { "epoch": 0.12, "grad_norm": 0.6421119369093602, "learning_rate": 9.976109700677537e-06, "loss": 0.5357, "step": 945 }, { "epoch": 0.12, "grad_norm": 0.6972200604773494, "learning_rate": 9.976008872169552e-06, "loss": 0.6487, "step": 946 }, { "epoch": 0.12, "grad_norm": 0.6898354852896688, "learning_rate": 9.975907831848768e-06, "loss": 0.5815, "step": 947 }, { "epoch": 0.12, "grad_norm": 1.375720273664557, "learning_rate": 9.97580657971949e-06, "loss": 0.6092, "step": 948 }, { "epoch": 0.12, "grad_norm": 0.8500198409185694, "learning_rate": 9.975705115786025e-06, "loss": 0.6139, "step": 949 }, { "epoch": 0.12, "grad_norm": 0.8683120723328192, "learning_rate": 9.975603440052694e-06, "loss": 0.6425, "step": 950 }, { "epoch": 0.12, "grad_norm": 0.593346455614026, "learning_rate": 9.975501552523822e-06, "loss": 0.5493, "step": 951 }, { "epoch": 0.12, "grad_norm": 0.8936501859503566, "learning_rate": 9.975399453203752e-06, "loss": 0.6537, "step": 952 }, { "epoch": 0.12, "grad_norm": 0.9860019505445827, "learning_rate": 9.975297142096825e-06, "loss": 0.7264, "step": 953 }, { "epoch": 0.12, "grad_norm": 0.8669851463587487, "learning_rate": 9.975194619207398e-06, "loss": 0.5254, "step": 954 }, { "epoch": 0.12, "grad_norm": 0.6487483507891224, "learning_rate": 9.975091884539833e-06, "loss": 0.5469, "step": 955 }, { "epoch": 0.12, "grad_norm": 0.7343751264180686, "learning_rate": 9.974988938098505e-06, "loss": 0.6526, "step": 956 }, { "epoch": 0.12, "grad_norm": 0.798451745274211, "learning_rate": 9.974885779887796e-06, "loss": 0.6238, "step": 957 }, { "epoch": 0.12, "grad_norm": 0.7944966330683679, "learning_rate": 9.974782409912098e-06, "loss": 0.6414, "step": 958 }, { "epoch": 0.12, "grad_norm": 0.7326192905022801, "learning_rate": 9.974678828175808e-06, "loss": 0.5782, "step": 959 }, { "epoch": 0.12, "grad_norm": 0.851409031464957, "learning_rate": 9.974575034683339e-06, "loss": 0.6633, "step": 960 }, { "epoch": 0.12, "grad_norm": 0.8329275260160559, "learning_rate": 9.974471029439106e-06, "loss": 0.6178, "step": 961 }, { "epoch": 0.12, "grad_norm": 0.8480645643861054, "learning_rate": 9.974366812447538e-06, "loss": 0.6868, "step": 962 }, { "epoch": 0.12, "grad_norm": 0.7403624291621149, "learning_rate": 9.974262383713069e-06, "loss": 0.585, "step": 963 }, { "epoch": 0.12, "grad_norm": 0.6057429652873727, "learning_rate": 9.974157743240146e-06, "loss": 0.5241, "step": 964 }, { "epoch": 0.12, "grad_norm": 0.723803797580167, "learning_rate": 9.974052891033226e-06, "loss": 0.5509, "step": 965 }, { "epoch": 0.12, "grad_norm": 0.7605102234841643, "learning_rate": 9.973947827096766e-06, "loss": 0.6096, "step": 966 }, { "epoch": 0.12, "grad_norm": 0.9964860694641393, "learning_rate": 9.973842551435243e-06, "loss": 0.6447, "step": 967 }, { "epoch": 0.12, "grad_norm": 0.7125726674264059, "learning_rate": 9.973737064053137e-06, "loss": 0.5714, "step": 968 }, { "epoch": 0.12, "grad_norm": 0.6447963992240072, "learning_rate": 9.973631364954937e-06, "loss": 0.5731, "step": 969 }, { "epoch": 0.12, "grad_norm": 0.8861228431851509, "learning_rate": 9.973525454145143e-06, "loss": 0.6725, "step": 970 }, { "epoch": 0.12, "grad_norm": 0.8599942129085489, "learning_rate": 9.973419331628265e-06, "loss": 0.619, "step": 971 }, { "epoch": 0.12, "grad_norm": 0.7870859596350437, "learning_rate": 9.973312997408817e-06, "loss": 0.6642, "step": 972 }, { "epoch": 0.12, "grad_norm": 0.8758457278522148, "learning_rate": 9.973206451491329e-06, "loss": 0.6678, "step": 973 }, { "epoch": 0.12, "grad_norm": 0.6407520295940252, "learning_rate": 9.973099693880332e-06, "loss": 0.5044, "step": 974 }, { "epoch": 0.12, "grad_norm": 0.6075628641057531, "learning_rate": 9.972992724580375e-06, "loss": 0.5654, "step": 975 }, { "epoch": 0.12, "grad_norm": 0.843137238932385, "learning_rate": 9.97288554359601e-06, "loss": 0.692, "step": 976 }, { "epoch": 0.12, "grad_norm": 0.8644489293636047, "learning_rate": 9.972778150931797e-06, "loss": 0.6415, "step": 977 }, { "epoch": 0.12, "grad_norm": 0.9426617416035463, "learning_rate": 9.972670546592307e-06, "loss": 0.6469, "step": 978 }, { "epoch": 0.12, "grad_norm": 0.64565198646652, "learning_rate": 9.972562730582125e-06, "loss": 0.5428, "step": 979 }, { "epoch": 0.12, "grad_norm": 0.6857282054523964, "learning_rate": 9.972454702905837e-06, "loss": 0.5881, "step": 980 }, { "epoch": 0.12, "grad_norm": 0.8631417246527289, "learning_rate": 9.97234646356804e-06, "loss": 0.6209, "step": 981 }, { "epoch": 0.13, "grad_norm": 0.8870029643336272, "learning_rate": 9.972238012573345e-06, "loss": 0.6494, "step": 982 }, { "epoch": 0.13, "grad_norm": 0.6632097527897141, "learning_rate": 9.972129349926368e-06, "loss": 0.5686, "step": 983 }, { "epoch": 0.13, "grad_norm": 0.6194036534760328, "learning_rate": 9.972020475631731e-06, "loss": 0.5366, "step": 984 }, { "epoch": 0.13, "grad_norm": 0.6992208227502703, "learning_rate": 9.971911389694072e-06, "loss": 0.5421, "step": 985 }, { "epoch": 0.13, "grad_norm": 0.820385052058671, "learning_rate": 9.971802092118032e-06, "loss": 0.6599, "step": 986 }, { "epoch": 0.13, "grad_norm": 0.6824016838728466, "learning_rate": 9.971692582908267e-06, "loss": 0.5739, "step": 987 }, { "epoch": 0.13, "grad_norm": 0.898698060195507, "learning_rate": 9.971582862069434e-06, "loss": 0.6914, "step": 988 }, { "epoch": 0.13, "grad_norm": 0.7831417966919729, "learning_rate": 9.971472929606206e-06, "loss": 0.637, "step": 989 }, { "epoch": 0.13, "grad_norm": 0.8710437767208958, "learning_rate": 9.971362785523261e-06, "loss": 0.6501, "step": 990 }, { "epoch": 0.13, "grad_norm": 0.9016651239427222, "learning_rate": 9.97125242982529e-06, "loss": 0.6262, "step": 991 }, { "epoch": 0.13, "grad_norm": 0.7706646260634279, "learning_rate": 9.971141862516988e-06, "loss": 0.641, "step": 992 }, { "epoch": 0.13, "grad_norm": 0.9426023139160854, "learning_rate": 9.971031083603061e-06, "loss": 0.6943, "step": 993 }, { "epoch": 0.13, "grad_norm": 0.6896168476557996, "learning_rate": 9.970920093088227e-06, "loss": 0.5623, "step": 994 }, { "epoch": 0.13, "grad_norm": 0.9728087762100552, "learning_rate": 9.970808890977211e-06, "loss": 0.7213, "step": 995 }, { "epoch": 0.13, "grad_norm": 0.9548926293067601, "learning_rate": 9.970697477274744e-06, "loss": 0.6837, "step": 996 }, { "epoch": 0.13, "grad_norm": 0.8069373684565228, "learning_rate": 9.970585851985569e-06, "loss": 0.616, "step": 997 }, { "epoch": 0.13, "grad_norm": 0.7443443251819689, "learning_rate": 9.970474015114437e-06, "loss": 0.5486, "step": 998 }, { "epoch": 0.13, "grad_norm": 0.8556421758965715, "learning_rate": 9.970361966666112e-06, "loss": 0.6649, "step": 999 }, { "epoch": 0.13, "grad_norm": 0.6362859396689061, "learning_rate": 9.97024970664536e-06, "loss": 0.5386, "step": 1000 }, { "epoch": 0.13, "grad_norm": 0.698702022519884, "learning_rate": 9.97013723505696e-06, "loss": 0.5146, "step": 1001 }, { "epoch": 0.13, "grad_norm": 0.646339997881583, "learning_rate": 9.970024551905701e-06, "loss": 0.5592, "step": 1002 }, { "epoch": 0.13, "grad_norm": 0.8423911491600581, "learning_rate": 9.969911657196378e-06, "loss": 0.6253, "step": 1003 }, { "epoch": 0.13, "grad_norm": 1.0160592301030402, "learning_rate": 9.969798550933799e-06, "loss": 0.6283, "step": 1004 }, { "epoch": 0.13, "grad_norm": 0.690352142959983, "learning_rate": 9.969685233122774e-06, "loss": 0.5628, "step": 1005 }, { "epoch": 0.13, "grad_norm": 1.0311787740806562, "learning_rate": 9.969571703768132e-06, "loss": 0.6159, "step": 1006 }, { "epoch": 0.13, "grad_norm": 0.7557814251185381, "learning_rate": 9.969457962874702e-06, "loss": 0.5594, "step": 1007 }, { "epoch": 0.13, "grad_norm": 0.785242310763266, "learning_rate": 9.969344010447326e-06, "loss": 0.6654, "step": 1008 }, { "epoch": 0.13, "grad_norm": 0.9067553933913962, "learning_rate": 9.969229846490857e-06, "loss": 0.6485, "step": 1009 }, { "epoch": 0.13, "grad_norm": 0.6216889007415037, "learning_rate": 9.969115471010152e-06, "loss": 0.5283, "step": 1010 }, { "epoch": 0.13, "grad_norm": 0.691634208557814, "learning_rate": 9.96900088401008e-06, "loss": 0.5518, "step": 1011 }, { "epoch": 0.13, "grad_norm": 0.7841277537304792, "learning_rate": 9.96888608549552e-06, "loss": 0.6263, "step": 1012 }, { "epoch": 0.13, "grad_norm": 0.8179078057475995, "learning_rate": 9.968771075471356e-06, "loss": 0.6032, "step": 1013 }, { "epoch": 0.13, "grad_norm": 0.9425520151640354, "learning_rate": 9.968655853942487e-06, "loss": 0.6576, "step": 1014 }, { "epoch": 0.13, "grad_norm": 0.8844954600716567, "learning_rate": 9.968540420913815e-06, "loss": 0.628, "step": 1015 }, { "epoch": 0.13, "grad_norm": 0.8832647227415215, "learning_rate": 9.968424776390254e-06, "loss": 0.6427, "step": 1016 }, { "epoch": 0.13, "grad_norm": 0.7438323830426561, "learning_rate": 9.968308920376726e-06, "loss": 0.6119, "step": 1017 }, { "epoch": 0.13, "grad_norm": 0.5971458444281293, "learning_rate": 9.968192852878166e-06, "loss": 0.5301, "step": 1018 }, { "epoch": 0.13, "grad_norm": 0.8894538345661622, "learning_rate": 9.968076573899513e-06, "loss": 0.6625, "step": 1019 }, { "epoch": 0.13, "grad_norm": 1.0120386073613519, "learning_rate": 9.967960083445714e-06, "loss": 0.6678, "step": 1020 }, { "epoch": 0.13, "grad_norm": 0.9919547610476247, "learning_rate": 9.96784338152173e-06, "loss": 0.7238, "step": 1021 }, { "epoch": 0.13, "grad_norm": 0.5978918140517522, "learning_rate": 9.967726468132529e-06, "loss": 0.5507, "step": 1022 }, { "epoch": 0.13, "grad_norm": 0.8751885204164667, "learning_rate": 9.967609343283084e-06, "loss": 0.6528, "step": 1023 }, { "epoch": 0.13, "grad_norm": 0.6618436429645297, "learning_rate": 9.967492006978386e-06, "loss": 0.5955, "step": 1024 }, { "epoch": 0.13, "grad_norm": 0.6316969474791747, "learning_rate": 9.967374459223426e-06, "loss": 0.5325, "step": 1025 }, { "epoch": 0.13, "grad_norm": 0.79104689602268, "learning_rate": 9.967256700023212e-06, "loss": 0.5868, "step": 1026 }, { "epoch": 0.13, "grad_norm": 1.011617657288393, "learning_rate": 9.96713872938275e-06, "loss": 0.6631, "step": 1027 }, { "epoch": 0.13, "grad_norm": 0.8170905485014265, "learning_rate": 9.967020547307065e-06, "loss": 0.6326, "step": 1028 }, { "epoch": 0.13, "grad_norm": 0.76207282808973, "learning_rate": 9.96690215380119e-06, "loss": 0.6174, "step": 1029 }, { "epoch": 0.13, "grad_norm": 0.72412968908377, "learning_rate": 9.96678354887016e-06, "loss": 0.5365, "step": 1030 }, { "epoch": 0.13, "grad_norm": 0.7450700003219325, "learning_rate": 9.966664732519026e-06, "loss": 0.6086, "step": 1031 }, { "epoch": 0.13, "grad_norm": 0.9396237903527469, "learning_rate": 9.966545704752845e-06, "loss": 0.6627, "step": 1032 }, { "epoch": 0.13, "grad_norm": 0.6340270017350794, "learning_rate": 9.966426465576687e-06, "loss": 0.5418, "step": 1033 }, { "epoch": 0.13, "grad_norm": 0.7629589665546378, "learning_rate": 9.966307014995623e-06, "loss": 0.6605, "step": 1034 }, { "epoch": 0.13, "grad_norm": 0.6688292179498095, "learning_rate": 9.966187353014739e-06, "loss": 0.5774, "step": 1035 }, { "epoch": 0.13, "grad_norm": 0.790214161776146, "learning_rate": 9.96606747963913e-06, "loss": 0.619, "step": 1036 }, { "epoch": 0.13, "grad_norm": 0.6345663154255697, "learning_rate": 9.965947394873896e-06, "loss": 0.596, "step": 1037 }, { "epoch": 0.13, "grad_norm": 0.7544768452896589, "learning_rate": 9.965827098724152e-06, "loss": 0.6468, "step": 1038 }, { "epoch": 0.13, "grad_norm": 0.6907460514008421, "learning_rate": 9.965706591195017e-06, "loss": 0.5965, "step": 1039 }, { "epoch": 0.13, "grad_norm": 0.6590134071145285, "learning_rate": 9.96558587229162e-06, "loss": 0.5376, "step": 1040 }, { "epoch": 0.13, "grad_norm": 0.6689845023180553, "learning_rate": 9.9654649420191e-06, "loss": 0.5715, "step": 1041 }, { "epoch": 0.13, "grad_norm": 0.7262232483291121, "learning_rate": 9.965343800382605e-06, "loss": 0.6246, "step": 1042 }, { "epoch": 0.13, "grad_norm": 0.772432713462555, "learning_rate": 9.965222447387291e-06, "loss": 0.5931, "step": 1043 }, { "epoch": 0.13, "grad_norm": 0.9080945357905905, "learning_rate": 9.965100883038323e-06, "loss": 0.6755, "step": 1044 }, { "epoch": 0.13, "grad_norm": 0.6537616023445731, "learning_rate": 9.964979107340878e-06, "loss": 0.5833, "step": 1045 }, { "epoch": 0.13, "grad_norm": 0.8274528250485428, "learning_rate": 9.964857120300138e-06, "loss": 0.6862, "step": 1046 }, { "epoch": 0.13, "grad_norm": 0.9268645173768608, "learning_rate": 9.964734921921296e-06, "loss": 0.6117, "step": 1047 }, { "epoch": 0.13, "grad_norm": 0.637947616361313, "learning_rate": 9.964612512209553e-06, "loss": 0.56, "step": 1048 }, { "epoch": 0.13, "grad_norm": 0.8345885037272234, "learning_rate": 9.964489891170122e-06, "loss": 0.6915, "step": 1049 }, { "epoch": 0.13, "grad_norm": 0.947682923968399, "learning_rate": 9.964367058808217e-06, "loss": 0.557, "step": 1050 }, { "epoch": 0.13, "grad_norm": 0.6394885257013123, "learning_rate": 9.964244015129071e-06, "loss": 0.5454, "step": 1051 }, { "epoch": 0.13, "grad_norm": 0.6039722828318739, "learning_rate": 9.964120760137922e-06, "loss": 0.5202, "step": 1052 }, { "epoch": 0.13, "grad_norm": 0.6552510421484344, "learning_rate": 9.963997293840017e-06, "loss": 0.5242, "step": 1053 }, { "epoch": 0.13, "grad_norm": 0.9393663003689945, "learning_rate": 9.963873616240607e-06, "loss": 0.6933, "step": 1054 }, { "epoch": 0.13, "grad_norm": 0.6995096683237148, "learning_rate": 9.96374972734496e-06, "loss": 0.5895, "step": 1055 }, { "epoch": 0.13, "grad_norm": 0.5909357598843684, "learning_rate": 9.96362562715835e-06, "loss": 0.5418, "step": 1056 }, { "epoch": 0.13, "grad_norm": 0.8842765522046706, "learning_rate": 9.963501315686057e-06, "loss": 0.6927, "step": 1057 }, { "epoch": 0.13, "grad_norm": 0.769690140762895, "learning_rate": 9.963376792933376e-06, "loss": 0.6504, "step": 1058 }, { "epoch": 0.13, "grad_norm": 0.8712214969714248, "learning_rate": 9.963252058905604e-06, "loss": 0.6754, "step": 1059 }, { "epoch": 0.14, "grad_norm": 0.7626277381140851, "learning_rate": 9.963127113608054e-06, "loss": 0.58, "step": 1060 }, { "epoch": 0.14, "grad_norm": 0.7415564123378365, "learning_rate": 9.963001957046041e-06, "loss": 0.5493, "step": 1061 }, { "epoch": 0.14, "grad_norm": 0.8252324047814299, "learning_rate": 9.962876589224894e-06, "loss": 0.6158, "step": 1062 }, { "epoch": 0.14, "grad_norm": 0.7740686906153754, "learning_rate": 9.96275101014995e-06, "loss": 0.647, "step": 1063 }, { "epoch": 0.14, "grad_norm": 0.6434890865213917, "learning_rate": 9.962625219826554e-06, "loss": 0.5146, "step": 1064 }, { "epoch": 0.14, "grad_norm": 0.6356266208073319, "learning_rate": 9.96249921826006e-06, "loss": 0.5051, "step": 1065 }, { "epoch": 0.14, "grad_norm": 0.6917109800463024, "learning_rate": 9.962373005455835e-06, "loss": 0.5306, "step": 1066 }, { "epoch": 0.14, "grad_norm": 0.6839785692218564, "learning_rate": 9.962246581419246e-06, "loss": 0.599, "step": 1067 }, { "epoch": 0.14, "grad_norm": 0.8413408456625441, "learning_rate": 9.962119946155678e-06, "loss": 0.6054, "step": 1068 }, { "epoch": 0.14, "grad_norm": 0.8825072950719561, "learning_rate": 9.96199309967052e-06, "loss": 0.6556, "step": 1069 }, { "epoch": 0.14, "grad_norm": 0.6568332605989884, "learning_rate": 9.961866041969172e-06, "loss": 0.5804, "step": 1070 }, { "epoch": 0.14, "grad_norm": 0.6701904019859587, "learning_rate": 9.961738773057044e-06, "loss": 0.5573, "step": 1071 }, { "epoch": 0.14, "grad_norm": 0.8403411171391452, "learning_rate": 9.96161129293955e-06, "loss": 0.5507, "step": 1072 }, { "epoch": 0.14, "grad_norm": 0.6320571860344364, "learning_rate": 9.96148360162212e-06, "loss": 0.4739, "step": 1073 }, { "epoch": 0.14, "grad_norm": 0.8241160117697117, "learning_rate": 9.961355699110188e-06, "loss": 0.6369, "step": 1074 }, { "epoch": 0.14, "grad_norm": 0.6775006836052057, "learning_rate": 9.961227585409194e-06, "loss": 0.5706, "step": 1075 }, { "epoch": 0.14, "grad_norm": 0.782967568295276, "learning_rate": 9.961099260524601e-06, "loss": 0.6293, "step": 1076 }, { "epoch": 0.14, "grad_norm": 0.9922788598511965, "learning_rate": 9.960970724461862e-06, "loss": 0.6214, "step": 1077 }, { "epoch": 0.14, "grad_norm": 0.6835477547128451, "learning_rate": 9.960841977226455e-06, "loss": 0.5708, "step": 1078 }, { "epoch": 0.14, "grad_norm": 0.7085800777580535, "learning_rate": 9.960713018823855e-06, "loss": 0.5775, "step": 1079 }, { "epoch": 0.14, "grad_norm": 0.8834974525895641, "learning_rate": 9.960583849259556e-06, "loss": 0.6351, "step": 1080 }, { "epoch": 0.14, "grad_norm": 0.7213832710568305, "learning_rate": 9.960454468539053e-06, "loss": 0.5675, "step": 1081 }, { "epoch": 0.14, "grad_norm": 0.8879443734615274, "learning_rate": 9.960324876667854e-06, "loss": 0.6596, "step": 1082 }, { "epoch": 0.14, "grad_norm": 0.7344950907712255, "learning_rate": 9.960195073651478e-06, "loss": 0.565, "step": 1083 }, { "epoch": 0.14, "grad_norm": 0.7007327711779928, "learning_rate": 9.960065059495446e-06, "loss": 0.5203, "step": 1084 }, { "epoch": 0.14, "grad_norm": 0.9642709628559569, "learning_rate": 9.959934834205296e-06, "loss": 0.5881, "step": 1085 }, { "epoch": 0.14, "grad_norm": 0.7909316044844384, "learning_rate": 9.95980439778657e-06, "loss": 0.5805, "step": 1086 }, { "epoch": 0.14, "grad_norm": 0.6601031474911905, "learning_rate": 9.95967375024482e-06, "loss": 0.5908, "step": 1087 }, { "epoch": 0.14, "grad_norm": 0.7576039583517902, "learning_rate": 9.959542891585606e-06, "loss": 0.5639, "step": 1088 }, { "epoch": 0.14, "grad_norm": 0.9212523958223972, "learning_rate": 9.9594118218145e-06, "loss": 0.6982, "step": 1089 }, { "epoch": 0.14, "grad_norm": 0.5924096392485172, "learning_rate": 9.959280540937082e-06, "loss": 0.5012, "step": 1090 }, { "epoch": 0.14, "grad_norm": 0.6641663266900463, "learning_rate": 9.959149048958938e-06, "loss": 0.5456, "step": 1091 }, { "epoch": 0.14, "grad_norm": 0.9091526004034339, "learning_rate": 9.959017345885666e-06, "loss": 0.7084, "step": 1092 }, { "epoch": 0.14, "grad_norm": 1.1011575016214283, "learning_rate": 9.958885431722874e-06, "loss": 0.6814, "step": 1093 }, { "epoch": 0.14, "grad_norm": 0.9750371123627987, "learning_rate": 9.958753306476172e-06, "loss": 0.6081, "step": 1094 }, { "epoch": 0.14, "grad_norm": 0.6333414917853868, "learning_rate": 9.95862097015119e-06, "loss": 0.5111, "step": 1095 }, { "epoch": 0.14, "grad_norm": 0.6731356071365201, "learning_rate": 9.95848842275356e-06, "loss": 0.5338, "step": 1096 }, { "epoch": 0.14, "grad_norm": 0.8756458617676428, "learning_rate": 9.95835566428892e-06, "loss": 0.5627, "step": 1097 }, { "epoch": 0.14, "grad_norm": 0.7144933715655397, "learning_rate": 9.958222694762926e-06, "loss": 0.6085, "step": 1098 }, { "epoch": 0.14, "grad_norm": 0.8363796515389963, "learning_rate": 9.958089514181236e-06, "loss": 0.586, "step": 1099 }, { "epoch": 0.14, "grad_norm": 1.027379405617287, "learning_rate": 9.95795612254952e-06, "loss": 0.6114, "step": 1100 }, { "epoch": 0.14, "grad_norm": 1.0292639883298103, "learning_rate": 9.957822519873453e-06, "loss": 0.6284, "step": 1101 }, { "epoch": 0.14, "grad_norm": 1.0574842183994442, "learning_rate": 9.957688706158725e-06, "loss": 0.5952, "step": 1102 }, { "epoch": 0.14, "grad_norm": 0.8483335753387509, "learning_rate": 9.957554681411032e-06, "loss": 0.6091, "step": 1103 }, { "epoch": 0.14, "grad_norm": 0.7428902607788493, "learning_rate": 9.957420445636077e-06, "loss": 0.6634, "step": 1104 }, { "epoch": 0.14, "grad_norm": 0.7026104738413125, "learning_rate": 9.957285998839577e-06, "loss": 0.5774, "step": 1105 }, { "epoch": 0.14, "grad_norm": 0.7245714299947138, "learning_rate": 9.957151341027251e-06, "loss": 0.6126, "step": 1106 }, { "epoch": 0.14, "grad_norm": 0.5621554383607955, "learning_rate": 9.957016472204834e-06, "loss": 0.5149, "step": 1107 }, { "epoch": 0.14, "grad_norm": 0.848785091007283, "learning_rate": 9.956881392378068e-06, "loss": 0.6135, "step": 1108 }, { "epoch": 0.14, "grad_norm": 0.6024732942844954, "learning_rate": 9.9567461015527e-06, "loss": 0.5038, "step": 1109 }, { "epoch": 0.14, "grad_norm": 0.7577805467470974, "learning_rate": 9.956610599734488e-06, "loss": 0.5623, "step": 1110 }, { "epoch": 0.14, "grad_norm": 0.6529702214325929, "learning_rate": 9.956474886929205e-06, "loss": 0.5759, "step": 1111 }, { "epoch": 0.14, "grad_norm": 0.8385138547411434, "learning_rate": 9.956338963142622e-06, "loss": 0.6259, "step": 1112 }, { "epoch": 0.14, "grad_norm": 0.6348340774981751, "learning_rate": 9.95620282838053e-06, "loss": 0.5491, "step": 1113 }, { "epoch": 0.14, "grad_norm": 0.8432930975231842, "learning_rate": 9.95606648264872e-06, "loss": 0.6528, "step": 1114 }, { "epoch": 0.14, "grad_norm": 0.646499926993693, "learning_rate": 9.955929925952996e-06, "loss": 0.5908, "step": 1115 }, { "epoch": 0.14, "grad_norm": 0.829058950554376, "learning_rate": 9.955793158299173e-06, "loss": 0.6599, "step": 1116 }, { "epoch": 0.14, "grad_norm": 0.613515823068835, "learning_rate": 9.955656179693073e-06, "loss": 0.4962, "step": 1117 }, { "epoch": 0.14, "grad_norm": 1.090777028471484, "learning_rate": 9.955518990140525e-06, "loss": 0.6454, "step": 1118 }, { "epoch": 0.14, "grad_norm": 0.5946182803339479, "learning_rate": 9.955381589647367e-06, "loss": 0.5602, "step": 1119 }, { "epoch": 0.14, "grad_norm": 0.8286584023219346, "learning_rate": 9.955243978219452e-06, "loss": 0.6051, "step": 1120 }, { "epoch": 0.14, "grad_norm": 0.6467996168545798, "learning_rate": 9.955106155862635e-06, "loss": 0.5473, "step": 1121 }, { "epoch": 0.14, "grad_norm": 0.5940894978116646, "learning_rate": 9.954968122582784e-06, "loss": 0.5338, "step": 1122 }, { "epoch": 0.14, "grad_norm": 0.8590824308861795, "learning_rate": 9.954829878385773e-06, "loss": 0.6326, "step": 1123 }, { "epoch": 0.14, "grad_norm": 0.6244978334185487, "learning_rate": 9.954691423277487e-06, "loss": 0.5246, "step": 1124 }, { "epoch": 0.14, "grad_norm": 0.5909229083578984, "learning_rate": 9.95455275726382e-06, "loss": 0.5258, "step": 1125 }, { "epoch": 0.14, "grad_norm": 0.7924079847665105, "learning_rate": 9.954413880350674e-06, "loss": 0.6057, "step": 1126 }, { "epoch": 0.14, "grad_norm": 1.008750003158618, "learning_rate": 9.954274792543963e-06, "loss": 0.6279, "step": 1127 }, { "epoch": 0.14, "grad_norm": 0.9275739014823893, "learning_rate": 9.954135493849605e-06, "loss": 0.6504, "step": 1128 }, { "epoch": 0.14, "grad_norm": 0.6588408665126066, "learning_rate": 9.95399598427353e-06, "loss": 0.5735, "step": 1129 }, { "epoch": 0.14, "grad_norm": 0.7338741263032064, "learning_rate": 9.953856263821677e-06, "loss": 0.536, "step": 1130 }, { "epoch": 0.14, "grad_norm": 0.6913121183957724, "learning_rate": 9.953716332499991e-06, "loss": 0.6244, "step": 1131 }, { "epoch": 0.14, "grad_norm": 0.9019267806465207, "learning_rate": 9.953576190314434e-06, "loss": 0.6787, "step": 1132 }, { "epoch": 0.14, "grad_norm": 0.6933534279255132, "learning_rate": 9.953435837270966e-06, "loss": 0.5362, "step": 1133 }, { "epoch": 0.14, "grad_norm": 0.8972807751072782, "learning_rate": 9.953295273375564e-06, "loss": 0.6749, "step": 1134 }, { "epoch": 0.14, "grad_norm": 0.6176012154418374, "learning_rate": 9.95315449863421e-06, "loss": 0.5268, "step": 1135 }, { "epoch": 0.14, "grad_norm": 0.9166086004029897, "learning_rate": 9.953013513052898e-06, "loss": 0.5797, "step": 1136 }, { "epoch": 0.14, "grad_norm": 0.7726218336683778, "learning_rate": 9.952872316637627e-06, "loss": 0.593, "step": 1137 }, { "epoch": 0.14, "grad_norm": 0.6059201439858781, "learning_rate": 9.952730909394409e-06, "loss": 0.5262, "step": 1138 }, { "epoch": 0.15, "grad_norm": 0.8709987021880209, "learning_rate": 9.952589291329262e-06, "loss": 0.6675, "step": 1139 }, { "epoch": 0.15, "grad_norm": 0.9166989103261511, "learning_rate": 9.952447462448219e-06, "loss": 0.6499, "step": 1140 }, { "epoch": 0.15, "grad_norm": 0.7545998650211926, "learning_rate": 9.952305422757308e-06, "loss": 0.6138, "step": 1141 }, { "epoch": 0.15, "grad_norm": 1.2410058081739495, "learning_rate": 9.952163172262583e-06, "loss": 0.6333, "step": 1142 }, { "epoch": 0.15, "grad_norm": 0.7992091222627921, "learning_rate": 9.952020710970098e-06, "loss": 0.5316, "step": 1143 }, { "epoch": 0.15, "grad_norm": 0.7289102533460955, "learning_rate": 9.951878038885914e-06, "loss": 0.537, "step": 1144 }, { "epoch": 0.15, "grad_norm": 0.6614206102741648, "learning_rate": 9.951735156016105e-06, "loss": 0.5623, "step": 1145 }, { "epoch": 0.15, "grad_norm": 0.8177142563654926, "learning_rate": 9.951592062366754e-06, "loss": 0.6382, "step": 1146 }, { "epoch": 0.15, "grad_norm": 0.7097636836053219, "learning_rate": 9.951448757943954e-06, "loss": 0.5972, "step": 1147 }, { "epoch": 0.15, "grad_norm": 0.9254677182285955, "learning_rate": 9.951305242753801e-06, "loss": 0.571, "step": 1148 }, { "epoch": 0.15, "grad_norm": 0.6988956392409752, "learning_rate": 9.951161516802408e-06, "loss": 0.5549, "step": 1149 }, { "epoch": 0.15, "grad_norm": 0.8134103090839735, "learning_rate": 9.951017580095889e-06, "loss": 0.5931, "step": 1150 }, { "epoch": 0.15, "grad_norm": 0.6059200480854988, "learning_rate": 9.950873432640373e-06, "loss": 0.5429, "step": 1151 }, { "epoch": 0.15, "grad_norm": 0.9848688757575335, "learning_rate": 9.950729074441998e-06, "loss": 0.6748, "step": 1152 }, { "epoch": 0.15, "grad_norm": 0.5763030200172503, "learning_rate": 9.950584505506904e-06, "loss": 0.495, "step": 1153 }, { "epoch": 0.15, "grad_norm": 0.7517038459961862, "learning_rate": 9.950439725841247e-06, "loss": 0.5802, "step": 1154 }, { "epoch": 0.15, "grad_norm": 1.0629583474210524, "learning_rate": 9.950294735451192e-06, "loss": 0.6841, "step": 1155 }, { "epoch": 0.15, "grad_norm": 0.7663206359698448, "learning_rate": 9.950149534342907e-06, "loss": 0.5319, "step": 1156 }, { "epoch": 0.15, "grad_norm": 0.7795779642312519, "learning_rate": 9.950004122522578e-06, "loss": 0.6035, "step": 1157 }, { "epoch": 0.15, "grad_norm": 1.178206918510157, "learning_rate": 9.949858499996389e-06, "loss": 0.6628, "step": 1158 }, { "epoch": 0.15, "grad_norm": 0.7391505252721015, "learning_rate": 9.949712666770541e-06, "loss": 0.5569, "step": 1159 }, { "epoch": 0.15, "grad_norm": 0.7583907860065018, "learning_rate": 9.949566622851243e-06, "loss": 0.5678, "step": 1160 }, { "epoch": 0.15, "grad_norm": 0.646869541150209, "learning_rate": 9.94942036824471e-06, "loss": 0.5737, "step": 1161 }, { "epoch": 0.15, "grad_norm": 0.794371583641433, "learning_rate": 9.949273902957169e-06, "loss": 0.6565, "step": 1162 }, { "epoch": 0.15, "grad_norm": 0.9050968642999875, "learning_rate": 9.94912722699485e-06, "loss": 0.5778, "step": 1163 }, { "epoch": 0.15, "grad_norm": 0.6189268621262112, "learning_rate": 9.948980340364002e-06, "loss": 0.5335, "step": 1164 }, { "epoch": 0.15, "grad_norm": 0.6192781388637013, "learning_rate": 9.948833243070877e-06, "loss": 0.5795, "step": 1165 }, { "epoch": 0.15, "grad_norm": 0.6997940420230017, "learning_rate": 9.948685935121735e-06, "loss": 0.5817, "step": 1166 }, { "epoch": 0.15, "grad_norm": 0.666587408485315, "learning_rate": 9.948538416522846e-06, "loss": 0.5358, "step": 1167 }, { "epoch": 0.15, "grad_norm": 0.922220574123095, "learning_rate": 9.948390687280489e-06, "loss": 0.6504, "step": 1168 }, { "epoch": 0.15, "grad_norm": 0.9291835324778818, "learning_rate": 9.948242747400953e-06, "loss": 0.6552, "step": 1169 }, { "epoch": 0.15, "grad_norm": 0.6609464921861429, "learning_rate": 9.948094596890536e-06, "loss": 0.5258, "step": 1170 }, { "epoch": 0.15, "grad_norm": 0.7432722982339321, "learning_rate": 9.947946235755545e-06, "loss": 0.518, "step": 1171 }, { "epoch": 0.15, "grad_norm": 0.9232759463515889, "learning_rate": 9.947797664002294e-06, "loss": 0.6911, "step": 1172 }, { "epoch": 0.15, "grad_norm": 0.9677536032809348, "learning_rate": 9.947648881637107e-06, "loss": 0.6149, "step": 1173 }, { "epoch": 0.15, "grad_norm": 1.0779556679380788, "learning_rate": 9.947499888666317e-06, "loss": 0.6383, "step": 1174 }, { "epoch": 0.15, "grad_norm": 0.6818092950991045, "learning_rate": 9.947350685096266e-06, "loss": 0.5611, "step": 1175 }, { "epoch": 0.15, "grad_norm": 0.6562011194061566, "learning_rate": 9.947201270933307e-06, "loss": 0.558, "step": 1176 }, { "epoch": 0.15, "grad_norm": 0.7506221775353435, "learning_rate": 9.947051646183798e-06, "loss": 0.6525, "step": 1177 }, { "epoch": 0.15, "grad_norm": 0.9732030857141276, "learning_rate": 9.946901810854109e-06, "loss": 0.6358, "step": 1178 }, { "epoch": 0.15, "grad_norm": 0.6719722587021625, "learning_rate": 9.94675176495062e-06, "loss": 0.5724, "step": 1179 }, { "epoch": 0.15, "grad_norm": 0.7277819505454043, "learning_rate": 9.946601508479714e-06, "loss": 0.5659, "step": 1180 }, { "epoch": 0.15, "grad_norm": 0.6024131898213151, "learning_rate": 9.946451041447788e-06, "loss": 0.5407, "step": 1181 }, { "epoch": 0.15, "grad_norm": 0.6194888085588076, "learning_rate": 9.94630036386125e-06, "loss": 0.531, "step": 1182 }, { "epoch": 0.15, "grad_norm": 0.819612771311822, "learning_rate": 9.946149475726509e-06, "loss": 0.6299, "step": 1183 }, { "epoch": 0.15, "grad_norm": 0.5988162409384846, "learning_rate": 9.945998377049992e-06, "loss": 0.5163, "step": 1184 }, { "epoch": 0.15, "grad_norm": 0.621047731739097, "learning_rate": 9.945847067838131e-06, "loss": 0.6432, "step": 1185 }, { "epoch": 0.15, "grad_norm": 0.6933969601897686, "learning_rate": 9.945695548097363e-06, "loss": 0.55, "step": 1186 }, { "epoch": 0.15, "grad_norm": 0.6215097677045021, "learning_rate": 9.94554381783414e-06, "loss": 0.627, "step": 1187 }, { "epoch": 0.15, "grad_norm": 0.9398154868265022, "learning_rate": 9.945391877054919e-06, "loss": 0.7062, "step": 1188 }, { "epoch": 0.15, "grad_norm": 0.9323473338926082, "learning_rate": 9.945239725766172e-06, "loss": 0.6333, "step": 1189 }, { "epoch": 0.15, "grad_norm": 0.9423907671305878, "learning_rate": 9.94508736397437e-06, "loss": 0.6752, "step": 1190 }, { "epoch": 0.15, "grad_norm": 0.9615586928806058, "learning_rate": 9.944934791686003e-06, "loss": 0.6944, "step": 1191 }, { "epoch": 0.15, "grad_norm": 0.6415203280411638, "learning_rate": 9.944782008907564e-06, "loss": 0.5337, "step": 1192 }, { "epoch": 0.15, "grad_norm": 0.6503610631443516, "learning_rate": 9.944629015645553e-06, "loss": 0.517, "step": 1193 }, { "epoch": 0.15, "grad_norm": 1.189574393483398, "learning_rate": 9.944475811906487e-06, "loss": 0.6663, "step": 1194 }, { "epoch": 0.15, "grad_norm": 0.8127292741305696, "learning_rate": 9.944322397696888e-06, "loss": 0.6214, "step": 1195 }, { "epoch": 0.15, "grad_norm": 0.9352628663178603, "learning_rate": 9.944168773023282e-06, "loss": 0.6445, "step": 1196 }, { "epoch": 0.15, "grad_norm": 0.8159532544926157, "learning_rate": 9.944014937892211e-06, "loss": 0.6347, "step": 1197 }, { "epoch": 0.15, "grad_norm": 0.7529396903292472, "learning_rate": 9.943860892310225e-06, "loss": 0.5708, "step": 1198 }, { "epoch": 0.15, "grad_norm": 0.7332699396889881, "learning_rate": 9.943706636283876e-06, "loss": 0.617, "step": 1199 }, { "epoch": 0.15, "grad_norm": 0.7065292946089605, "learning_rate": 9.943552169819734e-06, "loss": 0.5599, "step": 1200 }, { "epoch": 0.15, "grad_norm": 0.9523616116392661, "learning_rate": 9.943397492924377e-06, "loss": 0.6991, "step": 1201 }, { "epoch": 0.15, "grad_norm": 0.6718897182036292, "learning_rate": 9.943242605604381e-06, "loss": 0.5334, "step": 1202 }, { "epoch": 0.15, "grad_norm": 0.6255929956273176, "learning_rate": 9.943087507866345e-06, "loss": 0.5123, "step": 1203 }, { "epoch": 0.15, "grad_norm": 0.5863924057030013, "learning_rate": 9.94293219971687e-06, "loss": 0.5259, "step": 1204 }, { "epoch": 0.15, "grad_norm": 0.6129535666687689, "learning_rate": 9.942776681162566e-06, "loss": 0.5341, "step": 1205 }, { "epoch": 0.15, "grad_norm": 0.6738433228662217, "learning_rate": 9.942620952210057e-06, "loss": 0.5877, "step": 1206 }, { "epoch": 0.15, "grad_norm": 0.6691527608264151, "learning_rate": 9.942465012865964e-06, "loss": 0.6106, "step": 1207 }, { "epoch": 0.15, "grad_norm": 0.8947966968433815, "learning_rate": 9.94230886313693e-06, "loss": 0.6766, "step": 1208 }, { "epoch": 0.15, "grad_norm": 0.5937059305694171, "learning_rate": 9.942152503029603e-06, "loss": 0.5424, "step": 1209 }, { "epoch": 0.15, "grad_norm": 0.6666550628105659, "learning_rate": 9.941995932550636e-06, "loss": 0.6206, "step": 1210 }, { "epoch": 0.15, "grad_norm": 0.6855403367079185, "learning_rate": 9.941839151706694e-06, "loss": 0.5557, "step": 1211 }, { "epoch": 0.15, "grad_norm": 0.6996973493623373, "learning_rate": 9.941682160504452e-06, "loss": 0.638, "step": 1212 }, { "epoch": 0.15, "grad_norm": 0.8022251873817732, "learning_rate": 9.941524958950591e-06, "loss": 0.6535, "step": 1213 }, { "epoch": 0.15, "grad_norm": 0.8575600904950452, "learning_rate": 9.941367547051803e-06, "loss": 0.5889, "step": 1214 }, { "epoch": 0.15, "grad_norm": 0.7458489001018135, "learning_rate": 9.94120992481479e-06, "loss": 0.6282, "step": 1215 }, { "epoch": 0.15, "grad_norm": 0.7011274758384984, "learning_rate": 9.94105209224626e-06, "loss": 0.6035, "step": 1216 }, { "epoch": 0.16, "grad_norm": 0.778022051851357, "learning_rate": 9.940894049352932e-06, "loss": 0.6024, "step": 1217 }, { "epoch": 0.16, "grad_norm": 0.8229848811812075, "learning_rate": 9.940735796141533e-06, "loss": 0.7138, "step": 1218 }, { "epoch": 0.16, "grad_norm": 0.6693289356258347, "learning_rate": 9.940577332618798e-06, "loss": 0.6005, "step": 1219 }, { "epoch": 0.16, "grad_norm": 0.8768519648063721, "learning_rate": 9.940418658791475e-06, "loss": 0.6364, "step": 1220 }, { "epoch": 0.16, "grad_norm": 0.6210843299179503, "learning_rate": 9.940259774666316e-06, "loss": 0.5151, "step": 1221 }, { "epoch": 0.16, "grad_norm": 0.5928265232338573, "learning_rate": 9.940100680250086e-06, "loss": 0.5706, "step": 1222 }, { "epoch": 0.16, "grad_norm": 0.7666455691407895, "learning_rate": 9.939941375549559e-06, "loss": 0.611, "step": 1223 }, { "epoch": 0.16, "grad_norm": 0.8285991798299784, "learning_rate": 9.93978186057151e-06, "loss": 0.657, "step": 1224 }, { "epoch": 0.16, "grad_norm": 0.5513770793701552, "learning_rate": 9.939622135322733e-06, "loss": 0.5328, "step": 1225 }, { "epoch": 0.16, "grad_norm": 0.6724169105488185, "learning_rate": 9.939462199810027e-06, "loss": 0.5896, "step": 1226 }, { "epoch": 0.16, "grad_norm": 0.6217065032154994, "learning_rate": 9.9393020540402e-06, "loss": 0.535, "step": 1227 }, { "epoch": 0.16, "grad_norm": 0.714592607405014, "learning_rate": 9.939141698020067e-06, "loss": 0.5438, "step": 1228 }, { "epoch": 0.16, "grad_norm": 0.6270973608896668, "learning_rate": 9.938981131756455e-06, "loss": 0.5692, "step": 1229 }, { "epoch": 0.16, "grad_norm": 0.7516987047795822, "learning_rate": 9.938820355256201e-06, "loss": 0.6568, "step": 1230 }, { "epoch": 0.16, "grad_norm": 0.6853617324725388, "learning_rate": 9.938659368526146e-06, "loss": 0.5389, "step": 1231 }, { "epoch": 0.16, "grad_norm": 0.739056326079907, "learning_rate": 9.938498171573142e-06, "loss": 0.5849, "step": 1232 }, { "epoch": 0.16, "grad_norm": 0.8286906199401044, "learning_rate": 9.938336764404053e-06, "loss": 0.6549, "step": 1233 }, { "epoch": 0.16, "grad_norm": 0.6871217548760605, "learning_rate": 9.938175147025749e-06, "loss": 0.5441, "step": 1234 }, { "epoch": 0.16, "grad_norm": 0.635761385045518, "learning_rate": 9.938013319445107e-06, "loss": 0.5623, "step": 1235 }, { "epoch": 0.16, "grad_norm": 0.7732509643778649, "learning_rate": 9.93785128166902e-06, "loss": 0.6476, "step": 1236 }, { "epoch": 0.16, "grad_norm": 0.9388942019799706, "learning_rate": 9.937689033704383e-06, "loss": 0.6313, "step": 1237 }, { "epoch": 0.16, "grad_norm": 0.6506073910645148, "learning_rate": 9.937526575558102e-06, "loss": 0.5507, "step": 1238 }, { "epoch": 0.16, "grad_norm": 0.7109459417537408, "learning_rate": 9.937363907237093e-06, "loss": 0.5455, "step": 1239 }, { "epoch": 0.16, "grad_norm": 0.7131945189513951, "learning_rate": 9.937201028748278e-06, "loss": 0.5972, "step": 1240 }, { "epoch": 0.16, "grad_norm": 0.7933921050202154, "learning_rate": 9.937037940098595e-06, "loss": 0.6384, "step": 1241 }, { "epoch": 0.16, "grad_norm": 0.7117828298931219, "learning_rate": 9.936874641294982e-06, "loss": 0.5741, "step": 1242 }, { "epoch": 0.16, "grad_norm": 0.6858607363442373, "learning_rate": 9.936711132344393e-06, "loss": 0.5684, "step": 1243 }, { "epoch": 0.16, "grad_norm": 1.079827994215085, "learning_rate": 9.936547413253783e-06, "loss": 0.6917, "step": 1244 }, { "epoch": 0.16, "grad_norm": 0.8693329698611052, "learning_rate": 9.936383484030127e-06, "loss": 0.6281, "step": 1245 }, { "epoch": 0.16, "grad_norm": 0.9237381724365175, "learning_rate": 9.9362193446804e-06, "loss": 0.6816, "step": 1246 }, { "epoch": 0.16, "grad_norm": 0.5947942369877502, "learning_rate": 9.93605499521159e-06, "loss": 0.5281, "step": 1247 }, { "epoch": 0.16, "grad_norm": 0.6876185476635489, "learning_rate": 9.935890435630693e-06, "loss": 0.5819, "step": 1248 }, { "epoch": 0.16, "grad_norm": 0.7852566555702422, "learning_rate": 9.935725665944712e-06, "loss": 0.5835, "step": 1249 }, { "epoch": 0.16, "grad_norm": 0.8860897748537813, "learning_rate": 9.935560686160661e-06, "loss": 0.6229, "step": 1250 }, { "epoch": 0.16, "grad_norm": 0.8380467170046101, "learning_rate": 9.935395496285565e-06, "loss": 0.5754, "step": 1251 }, { "epoch": 0.16, "grad_norm": 0.8174419125773005, "learning_rate": 9.935230096326452e-06, "loss": 0.5831, "step": 1252 }, { "epoch": 0.16, "grad_norm": 0.6096272258583538, "learning_rate": 9.935064486290366e-06, "loss": 0.5627, "step": 1253 }, { "epoch": 0.16, "grad_norm": 0.8144193664929303, "learning_rate": 9.934898666184354e-06, "loss": 0.6342, "step": 1254 }, { "epoch": 0.16, "grad_norm": 0.6618055773716802, "learning_rate": 9.934732636015475e-06, "loss": 0.558, "step": 1255 }, { "epoch": 0.16, "grad_norm": 0.5670649211847698, "learning_rate": 9.934566395790798e-06, "loss": 0.5249, "step": 1256 }, { "epoch": 0.16, "grad_norm": 1.0116605711057045, "learning_rate": 9.934399945517398e-06, "loss": 0.7115, "step": 1257 }, { "epoch": 0.16, "grad_norm": 0.9606958544754229, "learning_rate": 9.934233285202362e-06, "loss": 0.6245, "step": 1258 }, { "epoch": 0.16, "grad_norm": 0.6005659704302857, "learning_rate": 9.93406641485278e-06, "loss": 0.5508, "step": 1259 }, { "epoch": 0.16, "grad_norm": 0.603908805834337, "learning_rate": 9.93389933447576e-06, "loss": 0.5268, "step": 1260 }, { "epoch": 0.16, "grad_norm": 0.6912683190406798, "learning_rate": 9.93373204407841e-06, "loss": 0.5603, "step": 1261 }, { "epoch": 0.16, "grad_norm": 1.5015099086663268, "learning_rate": 9.933564543667854e-06, "loss": 0.6758, "step": 1262 }, { "epoch": 0.16, "grad_norm": 0.61498877277781, "learning_rate": 9.933396833251221e-06, "loss": 0.5979, "step": 1263 }, { "epoch": 0.16, "grad_norm": 0.7587062308638808, "learning_rate": 9.933228912835649e-06, "loss": 0.535, "step": 1264 }, { "epoch": 0.16, "grad_norm": 0.6372135461611754, "learning_rate": 9.933060782428286e-06, "loss": 0.5916, "step": 1265 }, { "epoch": 0.16, "grad_norm": 0.8828201367862585, "learning_rate": 9.932892442036289e-06, "loss": 0.5955, "step": 1266 }, { "epoch": 0.16, "grad_norm": 0.8675700039740134, "learning_rate": 9.932723891666825e-06, "loss": 0.6343, "step": 1267 }, { "epoch": 0.16, "grad_norm": 1.000392034730579, "learning_rate": 9.932555131327069e-06, "loss": 0.5913, "step": 1268 }, { "epoch": 0.16, "grad_norm": 0.6441274395889746, "learning_rate": 9.9323861610242e-06, "loss": 0.5294, "step": 1269 }, { "epoch": 0.16, "grad_norm": 0.5634681746022322, "learning_rate": 9.932216980765416e-06, "loss": 0.5094, "step": 1270 }, { "epoch": 0.16, "grad_norm": 0.8087186209107541, "learning_rate": 9.932047590557916e-06, "loss": 0.6285, "step": 1271 }, { "epoch": 0.16, "grad_norm": 0.5830632100716865, "learning_rate": 9.93187799040891e-06, "loss": 0.5117, "step": 1272 }, { "epoch": 0.16, "grad_norm": 0.5993417802328452, "learning_rate": 9.931708180325619e-06, "loss": 0.5062, "step": 1273 }, { "epoch": 0.16, "grad_norm": 0.669294113898828, "learning_rate": 9.931538160315268e-06, "loss": 0.5355, "step": 1274 }, { "epoch": 0.16, "grad_norm": 0.60490174747613, "learning_rate": 9.931367930385098e-06, "loss": 0.5085, "step": 1275 }, { "epoch": 0.16, "grad_norm": 0.6262615291290174, "learning_rate": 9.931197490542354e-06, "loss": 0.5536, "step": 1276 }, { "epoch": 0.16, "grad_norm": 0.91396197589831, "learning_rate": 9.931026840794292e-06, "loss": 0.6301, "step": 1277 }, { "epoch": 0.16, "grad_norm": 1.0022672535619206, "learning_rate": 9.930855981148172e-06, "loss": 0.6549, "step": 1278 }, { "epoch": 0.16, "grad_norm": 0.9263003202148294, "learning_rate": 9.93068491161127e-06, "loss": 0.6036, "step": 1279 }, { "epoch": 0.16, "grad_norm": 0.7871313319089243, "learning_rate": 9.930513632190868e-06, "loss": 0.643, "step": 1280 }, { "epoch": 0.16, "grad_norm": 0.7385298884516197, "learning_rate": 9.930342142894259e-06, "loss": 0.544, "step": 1281 }, { "epoch": 0.16, "grad_norm": 0.9343110234348226, "learning_rate": 9.930170443728736e-06, "loss": 0.6865, "step": 1282 }, { "epoch": 0.16, "grad_norm": 1.778588128833451, "learning_rate": 9.929998534701612e-06, "loss": 0.6069, "step": 1283 }, { "epoch": 0.16, "grad_norm": 1.0524259183173499, "learning_rate": 9.929826415820207e-06, "loss": 0.6236, "step": 1284 }, { "epoch": 0.16, "grad_norm": 0.8147387322477797, "learning_rate": 9.929654087091845e-06, "loss": 0.5876, "step": 1285 }, { "epoch": 0.16, "grad_norm": 0.8626286053173774, "learning_rate": 9.92948154852386e-06, "loss": 0.6699, "step": 1286 }, { "epoch": 0.16, "grad_norm": 0.6848567231390712, "learning_rate": 9.929308800123597e-06, "loss": 0.5203, "step": 1287 }, { "epoch": 0.16, "grad_norm": 0.8106992091885179, "learning_rate": 9.929135841898412e-06, "loss": 0.6333, "step": 1288 }, { "epoch": 0.16, "grad_norm": 0.6355507244372836, "learning_rate": 9.928962673855664e-06, "loss": 0.5674, "step": 1289 }, { "epoch": 0.16, "grad_norm": 0.9575810776483066, "learning_rate": 9.928789296002726e-06, "loss": 0.5601, "step": 1290 }, { "epoch": 0.16, "grad_norm": 0.6614320788002571, "learning_rate": 9.928615708346978e-06, "loss": 0.5323, "step": 1291 }, { "epoch": 0.16, "grad_norm": 0.78603144821625, "learning_rate": 9.92844191089581e-06, "loss": 0.63, "step": 1292 }, { "epoch": 0.16, "grad_norm": 0.854145474325908, "learning_rate": 9.92826790365662e-06, "loss": 0.5931, "step": 1293 }, { "epoch": 0.16, "grad_norm": 0.9479017928633185, "learning_rate": 9.928093686636811e-06, "loss": 0.6366, "step": 1294 }, { "epoch": 0.16, "grad_norm": 0.6556789536313667, "learning_rate": 9.927919259843801e-06, "loss": 0.6144, "step": 1295 }, { "epoch": 0.17, "grad_norm": 0.7178000902835753, "learning_rate": 9.927744623285017e-06, "loss": 0.6428, "step": 1296 }, { "epoch": 0.17, "grad_norm": 0.9778528421679374, "learning_rate": 9.927569776967891e-06, "loss": 0.6789, "step": 1297 }, { "epoch": 0.17, "grad_norm": 0.699409897187686, "learning_rate": 9.927394720899866e-06, "loss": 0.5989, "step": 1298 }, { "epoch": 0.17, "grad_norm": 0.7475897085800394, "learning_rate": 9.927219455088394e-06, "loss": 0.5351, "step": 1299 }, { "epoch": 0.17, "grad_norm": 0.9380606806438925, "learning_rate": 9.927043979540934e-06, "loss": 0.6808, "step": 1300 }, { "epoch": 0.17, "grad_norm": 0.7623954190929098, "learning_rate": 9.926868294264957e-06, "loss": 0.6033, "step": 1301 }, { "epoch": 0.17, "grad_norm": 0.8451633671559466, "learning_rate": 9.92669239926794e-06, "loss": 0.6047, "step": 1302 }, { "epoch": 0.17, "grad_norm": 0.6344557359348485, "learning_rate": 9.926516294557374e-06, "loss": 0.4629, "step": 1303 }, { "epoch": 0.17, "grad_norm": 0.983505389962346, "learning_rate": 9.92633998014075e-06, "loss": 0.6401, "step": 1304 }, { "epoch": 0.17, "grad_norm": 0.8396107779558755, "learning_rate": 9.926163456025573e-06, "loss": 0.5932, "step": 1305 }, { "epoch": 0.17, "grad_norm": 0.6464848349421851, "learning_rate": 9.925986722219362e-06, "loss": 0.5482, "step": 1306 }, { "epoch": 0.17, "grad_norm": 0.6152424291581389, "learning_rate": 9.925809778729639e-06, "loss": 0.5177, "step": 1307 }, { "epoch": 0.17, "grad_norm": 0.8963702088469717, "learning_rate": 9.925632625563931e-06, "loss": 0.6215, "step": 1308 }, { "epoch": 0.17, "grad_norm": 0.8236919014857446, "learning_rate": 9.925455262729785e-06, "loss": 0.6344, "step": 1309 }, { "epoch": 0.17, "grad_norm": 0.8067235544500863, "learning_rate": 9.925277690234745e-06, "loss": 0.6568, "step": 1310 }, { "epoch": 0.17, "grad_norm": 0.8053307649416414, "learning_rate": 9.925099908086375e-06, "loss": 0.6914, "step": 1311 }, { "epoch": 0.17, "grad_norm": 0.8185646572828913, "learning_rate": 9.924921916292239e-06, "loss": 0.6134, "step": 1312 }, { "epoch": 0.17, "grad_norm": 0.8488052935040817, "learning_rate": 9.924743714859915e-06, "loss": 0.6114, "step": 1313 }, { "epoch": 0.17, "grad_norm": 0.786538209385159, "learning_rate": 9.924565303796988e-06, "loss": 0.6325, "step": 1314 }, { "epoch": 0.17, "grad_norm": 0.8421790302765904, "learning_rate": 9.924386683111055e-06, "loss": 0.6299, "step": 1315 }, { "epoch": 0.17, "grad_norm": 0.8593619423668151, "learning_rate": 9.924207852809715e-06, "loss": 0.5854, "step": 1316 }, { "epoch": 0.17, "grad_norm": 0.7794165596221101, "learning_rate": 9.924028812900582e-06, "loss": 0.6472, "step": 1317 }, { "epoch": 0.17, "grad_norm": 0.6200289391608015, "learning_rate": 9.923849563391278e-06, "loss": 0.5082, "step": 1318 }, { "epoch": 0.17, "grad_norm": 0.6503282906773031, "learning_rate": 9.923670104289433e-06, "loss": 0.5554, "step": 1319 }, { "epoch": 0.17, "grad_norm": 0.9334848680283766, "learning_rate": 9.923490435602685e-06, "loss": 0.6438, "step": 1320 }, { "epoch": 0.17, "grad_norm": 0.853719193099155, "learning_rate": 9.923310557338681e-06, "loss": 0.5958, "step": 1321 }, { "epoch": 0.17, "grad_norm": 0.6323711898369337, "learning_rate": 9.923130469505081e-06, "loss": 0.524, "step": 1322 }, { "epoch": 0.17, "grad_norm": 0.6266910281118175, "learning_rate": 9.922950172109549e-06, "loss": 0.5212, "step": 1323 }, { "epoch": 0.17, "grad_norm": 0.6949655797370283, "learning_rate": 9.922769665159759e-06, "loss": 0.5901, "step": 1324 }, { "epoch": 0.17, "grad_norm": 0.8504757543505016, "learning_rate": 9.922588948663395e-06, "loss": 0.6324, "step": 1325 }, { "epoch": 0.17, "grad_norm": 1.198837872795737, "learning_rate": 9.92240802262815e-06, "loss": 0.5773, "step": 1326 }, { "epoch": 0.17, "grad_norm": 0.7389168502790365, "learning_rate": 9.922226887061726e-06, "loss": 0.6019, "step": 1327 }, { "epoch": 0.17, "grad_norm": 0.9543392867703335, "learning_rate": 9.922045541971831e-06, "loss": 0.6718, "step": 1328 }, { "epoch": 0.17, "grad_norm": 0.6769447419107208, "learning_rate": 9.921863987366187e-06, "loss": 0.5451, "step": 1329 }, { "epoch": 0.17, "grad_norm": 0.7995115583222592, "learning_rate": 9.921682223252522e-06, "loss": 0.6461, "step": 1330 }, { "epoch": 0.17, "grad_norm": 0.5807904876349749, "learning_rate": 9.921500249638572e-06, "loss": 0.5647, "step": 1331 }, { "epoch": 0.17, "grad_norm": 0.7328444051173592, "learning_rate": 9.921318066532082e-06, "loss": 0.5567, "step": 1332 }, { "epoch": 0.17, "grad_norm": 0.6423699956731261, "learning_rate": 9.921135673940809e-06, "loss": 0.532, "step": 1333 }, { "epoch": 0.17, "grad_norm": 1.0732783771120218, "learning_rate": 9.920953071872514e-06, "loss": 0.6393, "step": 1334 }, { "epoch": 0.17, "grad_norm": 0.6072194907461469, "learning_rate": 9.920770260334974e-06, "loss": 0.5242, "step": 1335 }, { "epoch": 0.17, "grad_norm": 0.8678831232928724, "learning_rate": 9.920587239335968e-06, "loss": 0.6428, "step": 1336 }, { "epoch": 0.17, "grad_norm": 0.7322646257079967, "learning_rate": 9.920404008883287e-06, "loss": 0.582, "step": 1337 }, { "epoch": 0.17, "grad_norm": 0.6987065357589037, "learning_rate": 9.920220568984731e-06, "loss": 0.5263, "step": 1338 }, { "epoch": 0.17, "grad_norm": 0.894246119987906, "learning_rate": 9.920036919648108e-06, "loss": 0.6545, "step": 1339 }, { "epoch": 0.17, "grad_norm": 0.7271150938938604, "learning_rate": 9.919853060881238e-06, "loss": 0.6291, "step": 1340 }, { "epoch": 0.17, "grad_norm": 1.0716868696461448, "learning_rate": 9.919668992691941e-06, "loss": 0.6191, "step": 1341 }, { "epoch": 0.17, "grad_norm": 1.07794875912514, "learning_rate": 9.919484715088057e-06, "loss": 0.6533, "step": 1342 }, { "epoch": 0.17, "grad_norm": 0.7029692021347415, "learning_rate": 9.91930022807743e-06, "loss": 0.5706, "step": 1343 }, { "epoch": 0.17, "grad_norm": 0.5813230784025685, "learning_rate": 9.919115531667911e-06, "loss": 0.5593, "step": 1344 }, { "epoch": 0.17, "grad_norm": 0.605113227667247, "learning_rate": 9.918930625867364e-06, "loss": 0.5373, "step": 1345 }, { "epoch": 0.17, "grad_norm": 0.7225811940174828, "learning_rate": 9.918745510683659e-06, "loss": 0.6309, "step": 1346 }, { "epoch": 0.17, "grad_norm": 0.6053881124764692, "learning_rate": 9.918560186124674e-06, "loss": 0.5351, "step": 1347 }, { "epoch": 0.17, "grad_norm": 0.6818319116848462, "learning_rate": 9.918374652198302e-06, "loss": 0.5734, "step": 1348 }, { "epoch": 0.17, "grad_norm": 0.8958174067084802, "learning_rate": 9.918188908912436e-06, "loss": 0.6048, "step": 1349 }, { "epoch": 0.17, "grad_norm": 0.8187811101690751, "learning_rate": 9.918002956274986e-06, "loss": 0.6245, "step": 1350 }, { "epoch": 0.17, "grad_norm": 0.7363704720909162, "learning_rate": 9.917816794293864e-06, "loss": 0.5962, "step": 1351 }, { "epoch": 0.17, "grad_norm": 0.7539830444599103, "learning_rate": 9.917630422976997e-06, "loss": 0.632, "step": 1352 }, { "epoch": 0.17, "grad_norm": 0.800461503275176, "learning_rate": 9.917443842332318e-06, "loss": 0.6383, "step": 1353 }, { "epoch": 0.17, "grad_norm": 1.5859864768068026, "learning_rate": 9.917257052367768e-06, "loss": 0.5852, "step": 1354 }, { "epoch": 0.17, "grad_norm": 0.7748718361879976, "learning_rate": 9.917070053091298e-06, "loss": 0.5468, "step": 1355 }, { "epoch": 0.17, "grad_norm": 0.7480038777585786, "learning_rate": 9.916882844510868e-06, "loss": 0.6377, "step": 1356 }, { "epoch": 0.17, "grad_norm": 0.5950072074474939, "learning_rate": 9.91669542663445e-06, "loss": 0.5946, "step": 1357 }, { "epoch": 0.17, "grad_norm": 0.7547818256118963, "learning_rate": 9.916507799470016e-06, "loss": 0.5945, "step": 1358 }, { "epoch": 0.17, "grad_norm": 0.6190436223612008, "learning_rate": 9.916319963025558e-06, "loss": 0.588, "step": 1359 }, { "epoch": 0.17, "grad_norm": 0.6289210240530831, "learning_rate": 9.91613191730907e-06, "loss": 0.5692, "step": 1360 }, { "epoch": 0.17, "grad_norm": 0.6210779960478716, "learning_rate": 9.915943662328555e-06, "loss": 0.5621, "step": 1361 }, { "epoch": 0.17, "grad_norm": 1.0351385027493207, "learning_rate": 9.915755198092027e-06, "loss": 0.6741, "step": 1362 }, { "epoch": 0.17, "grad_norm": 1.0988988261684944, "learning_rate": 9.915566524607509e-06, "loss": 0.5733, "step": 1363 }, { "epoch": 0.17, "grad_norm": 0.8624232098103166, "learning_rate": 9.915377641883033e-06, "loss": 0.6935, "step": 1364 }, { "epoch": 0.17, "grad_norm": 0.9210306619681377, "learning_rate": 9.915188549926637e-06, "loss": 0.6424, "step": 1365 }, { "epoch": 0.17, "grad_norm": 0.9948571038404521, "learning_rate": 9.914999248746371e-06, "loss": 0.6696, "step": 1366 }, { "epoch": 0.17, "grad_norm": 3.2880528637544018, "learning_rate": 9.914809738350294e-06, "loss": 0.6204, "step": 1367 }, { "epoch": 0.17, "grad_norm": 0.6282329190313225, "learning_rate": 9.914620018746472e-06, "loss": 0.5488, "step": 1368 }, { "epoch": 0.17, "grad_norm": 0.8313993414409726, "learning_rate": 9.91443008994298e-06, "loss": 0.6667, "step": 1369 }, { "epoch": 0.17, "grad_norm": 1.1268742468420139, "learning_rate": 9.914239951947902e-06, "loss": 0.7016, "step": 1370 }, { "epoch": 0.17, "grad_norm": 0.7616004118200012, "learning_rate": 9.914049604769335e-06, "loss": 0.6171, "step": 1371 }, { "epoch": 0.17, "grad_norm": 0.6443959873384107, "learning_rate": 9.913859048415378e-06, "loss": 0.5593, "step": 1372 }, { "epoch": 0.17, "grad_norm": 0.8507864958821436, "learning_rate": 9.913668282894144e-06, "loss": 0.6136, "step": 1373 }, { "epoch": 0.18, "grad_norm": 0.7798333503909397, "learning_rate": 9.913477308213754e-06, "loss": 0.6367, "step": 1374 }, { "epoch": 0.18, "grad_norm": 0.8440171935341105, "learning_rate": 9.913286124382335e-06, "loss": 0.6558, "step": 1375 }, { "epoch": 0.18, "grad_norm": 0.7562020603164891, "learning_rate": 9.913094731408029e-06, "loss": 0.6334, "step": 1376 }, { "epoch": 0.18, "grad_norm": 0.7084724801967266, "learning_rate": 9.912903129298977e-06, "loss": 0.5323, "step": 1377 }, { "epoch": 0.18, "grad_norm": 0.6243371740891822, "learning_rate": 9.91271131806334e-06, "loss": 0.5569, "step": 1378 }, { "epoch": 0.18, "grad_norm": 0.5874709344697451, "learning_rate": 9.91251929770928e-06, "loss": 0.539, "step": 1379 }, { "epoch": 0.18, "grad_norm": 1.221005406389, "learning_rate": 9.912327068244972e-06, "loss": 0.6443, "step": 1380 }, { "epoch": 0.18, "grad_norm": 0.6991482269482668, "learning_rate": 9.912134629678598e-06, "loss": 0.5285, "step": 1381 }, { "epoch": 0.18, "grad_norm": 0.7167757674295981, "learning_rate": 9.91194198201835e-06, "loss": 0.5978, "step": 1382 }, { "epoch": 0.18, "grad_norm": 0.5761426658547066, "learning_rate": 9.911749125272428e-06, "loss": 0.5181, "step": 1383 }, { "epoch": 0.18, "grad_norm": 0.8663088232605922, "learning_rate": 9.911556059449043e-06, "loss": 0.6388, "step": 1384 }, { "epoch": 0.18, "grad_norm": 1.49120814305152, "learning_rate": 9.91136278455641e-06, "loss": 0.6325, "step": 1385 }, { "epoch": 0.18, "grad_norm": 0.623522604963163, "learning_rate": 9.911169300602758e-06, "loss": 0.5337, "step": 1386 }, { "epoch": 0.18, "grad_norm": 0.7734456232414336, "learning_rate": 9.910975607596322e-06, "loss": 0.5485, "step": 1387 }, { "epoch": 0.18, "grad_norm": 0.5857910899105334, "learning_rate": 9.91078170554535e-06, "loss": 0.4878, "step": 1388 }, { "epoch": 0.18, "grad_norm": 1.089985469595313, "learning_rate": 9.910587594458089e-06, "loss": 0.6836, "step": 1389 }, { "epoch": 0.18, "grad_norm": 0.8096160681242981, "learning_rate": 9.910393274342811e-06, "loss": 0.613, "step": 1390 }, { "epoch": 0.18, "grad_norm": 0.9494275055958991, "learning_rate": 9.91019874520778e-06, "loss": 0.6892, "step": 1391 }, { "epoch": 0.18, "grad_norm": 0.8545086755605236, "learning_rate": 9.910004007061281e-06, "loss": 0.5985, "step": 1392 }, { "epoch": 0.18, "grad_norm": 0.8613985857742912, "learning_rate": 9.9098090599116e-06, "loss": 0.6026, "step": 1393 }, { "epoch": 0.18, "grad_norm": 0.6690755088707274, "learning_rate": 9.909613903767038e-06, "loss": 0.5481, "step": 1394 }, { "epoch": 0.18, "grad_norm": 0.649320401916616, "learning_rate": 9.909418538635898e-06, "loss": 0.5486, "step": 1395 }, { "epoch": 0.18, "grad_norm": 0.8956892661343855, "learning_rate": 9.909222964526502e-06, "loss": 0.6372, "step": 1396 }, { "epoch": 0.18, "grad_norm": 0.6206099989106391, "learning_rate": 9.909027181447171e-06, "loss": 0.5523, "step": 1397 }, { "epoch": 0.18, "grad_norm": 0.6666549049604876, "learning_rate": 9.90883118940624e-06, "loss": 0.4955, "step": 1398 }, { "epoch": 0.18, "grad_norm": 0.9338525361206736, "learning_rate": 9.908634988412052e-06, "loss": 0.7067, "step": 1399 }, { "epoch": 0.18, "grad_norm": 0.5636396475962866, "learning_rate": 9.908438578472958e-06, "loss": 0.502, "step": 1400 }, { "epoch": 0.18, "grad_norm": 0.9191880855606592, "learning_rate": 9.908241959597317e-06, "loss": 0.6488, "step": 1401 }, { "epoch": 0.18, "grad_norm": 0.6686319641226389, "learning_rate": 9.908045131793504e-06, "loss": 0.533, "step": 1402 }, { "epoch": 0.18, "grad_norm": 0.7011337591826701, "learning_rate": 9.907848095069892e-06, "loss": 0.5539, "step": 1403 }, { "epoch": 0.18, "grad_norm": 0.6767613065949596, "learning_rate": 9.907650849434868e-06, "loss": 0.6041, "step": 1404 }, { "epoch": 0.18, "grad_norm": 0.832178458992157, "learning_rate": 9.907453394896834e-06, "loss": 0.6848, "step": 1405 }, { "epoch": 0.18, "grad_norm": 0.7569351652390657, "learning_rate": 9.907255731464186e-06, "loss": 0.5541, "step": 1406 }, { "epoch": 0.18, "grad_norm": 0.6697095708357677, "learning_rate": 9.907057859145347e-06, "loss": 0.4926, "step": 1407 }, { "epoch": 0.18, "grad_norm": 0.6832580071791883, "learning_rate": 9.906859777948735e-06, "loss": 0.5754, "step": 1408 }, { "epoch": 0.18, "grad_norm": 0.8892841362498346, "learning_rate": 9.906661487882781e-06, "loss": 0.6543, "step": 1409 }, { "epoch": 0.18, "grad_norm": 0.6172957653929051, "learning_rate": 9.906462988955927e-06, "loss": 0.5229, "step": 1410 }, { "epoch": 0.18, "grad_norm": 0.6165829961599874, "learning_rate": 9.90626428117662e-06, "loss": 0.5249, "step": 1411 }, { "epoch": 0.18, "grad_norm": 0.9504490083975338, "learning_rate": 9.906065364553325e-06, "loss": 0.6192, "step": 1412 }, { "epoch": 0.18, "grad_norm": 1.1873244845545143, "learning_rate": 9.905866239094504e-06, "loss": 0.6932, "step": 1413 }, { "epoch": 0.18, "grad_norm": 8.06923183666248, "learning_rate": 9.905666904808634e-06, "loss": 0.6468, "step": 1414 }, { "epoch": 0.18, "grad_norm": 0.6611026336952993, "learning_rate": 9.905467361704197e-06, "loss": 0.5087, "step": 1415 }, { "epoch": 0.18, "grad_norm": 0.8563940941522039, "learning_rate": 9.905267609789694e-06, "loss": 0.6809, "step": 1416 }, { "epoch": 0.18, "grad_norm": 0.7925201345931134, "learning_rate": 9.905067649073623e-06, "loss": 0.577, "step": 1417 }, { "epoch": 0.18, "grad_norm": 0.7296902809381323, "learning_rate": 9.904867479564495e-06, "loss": 0.5895, "step": 1418 }, { "epoch": 0.18, "grad_norm": 0.7197783778450046, "learning_rate": 9.904667101270832e-06, "loss": 0.526, "step": 1419 }, { "epoch": 0.18, "grad_norm": 0.9967702771570577, "learning_rate": 9.904466514201166e-06, "loss": 0.6622, "step": 1420 }, { "epoch": 0.18, "grad_norm": 0.8584232773378019, "learning_rate": 9.904265718364032e-06, "loss": 0.5902, "step": 1421 }, { "epoch": 0.18, "grad_norm": 1.0000577762329252, "learning_rate": 9.904064713767978e-06, "loss": 0.6025, "step": 1422 }, { "epoch": 0.18, "grad_norm": 0.6847807358568969, "learning_rate": 9.90386350042156e-06, "loss": 0.5539, "step": 1423 }, { "epoch": 0.18, "grad_norm": 1.0644584919556448, "learning_rate": 9.903662078333342e-06, "loss": 0.6682, "step": 1424 }, { "epoch": 0.18, "grad_norm": 0.5823183939248158, "learning_rate": 9.9034604475119e-06, "loss": 0.5162, "step": 1425 }, { "epoch": 0.18, "grad_norm": 0.9679891295378018, "learning_rate": 9.903258607965818e-06, "loss": 0.6741, "step": 1426 }, { "epoch": 0.18, "grad_norm": 0.6281644095149793, "learning_rate": 9.903056559703683e-06, "loss": 0.5355, "step": 1427 }, { "epoch": 0.18, "grad_norm": 0.9660733377233732, "learning_rate": 9.902854302734099e-06, "loss": 0.6062, "step": 1428 }, { "epoch": 0.18, "grad_norm": 1.5278258123370565, "learning_rate": 9.902651837065675e-06, "loss": 0.6827, "step": 1429 }, { "epoch": 0.18, "grad_norm": 0.8966393076686645, "learning_rate": 9.902449162707029e-06, "loss": 0.6557, "step": 1430 }, { "epoch": 0.18, "grad_norm": 0.8878906994413838, "learning_rate": 9.90224627966679e-06, "loss": 0.6704, "step": 1431 }, { "epoch": 0.18, "grad_norm": 1.1623132432739118, "learning_rate": 9.902043187953589e-06, "loss": 0.6952, "step": 1432 }, { "epoch": 0.18, "grad_norm": 0.9283657657374793, "learning_rate": 9.901839887576075e-06, "loss": 0.6659, "step": 1433 }, { "epoch": 0.18, "grad_norm": 0.9289844103113588, "learning_rate": 9.901636378542902e-06, "loss": 0.6226, "step": 1434 }, { "epoch": 0.18, "grad_norm": 0.7697613872955419, "learning_rate": 9.901432660862731e-06, "loss": 0.5677, "step": 1435 }, { "epoch": 0.18, "grad_norm": 0.8329828227111222, "learning_rate": 9.901228734544235e-06, "loss": 0.586, "step": 1436 }, { "epoch": 0.18, "grad_norm": 0.6647831985745791, "learning_rate": 9.901024599596092e-06, "loss": 0.5618, "step": 1437 }, { "epoch": 0.18, "grad_norm": 0.9703069379973013, "learning_rate": 9.900820256026996e-06, "loss": 0.6259, "step": 1438 }, { "epoch": 0.18, "grad_norm": 0.7257311929494387, "learning_rate": 9.90061570384564e-06, "loss": 0.5675, "step": 1439 }, { "epoch": 0.18, "grad_norm": 0.6434160744348719, "learning_rate": 9.900410943060734e-06, "loss": 0.5133, "step": 1440 }, { "epoch": 0.18, "grad_norm": 0.567375826023123, "learning_rate": 9.900205973680996e-06, "loss": 0.5638, "step": 1441 }, { "epoch": 0.18, "grad_norm": 1.1463821681631006, "learning_rate": 9.900000795715146e-06, "loss": 0.6737, "step": 1442 }, { "epoch": 0.18, "grad_norm": 0.7329192627325487, "learning_rate": 9.899795409171923e-06, "loss": 0.5904, "step": 1443 }, { "epoch": 0.18, "grad_norm": 0.8684228605169513, "learning_rate": 9.899589814060063e-06, "loss": 0.627, "step": 1444 }, { "epoch": 0.18, "grad_norm": 0.5880745445069139, "learning_rate": 9.899384010388324e-06, "loss": 0.5317, "step": 1445 }, { "epoch": 0.18, "grad_norm": 0.6981588853474568, "learning_rate": 9.899177998165464e-06, "loss": 0.5199, "step": 1446 }, { "epoch": 0.18, "grad_norm": 0.9437908832128978, "learning_rate": 9.898971777400251e-06, "loss": 0.6431, "step": 1447 }, { "epoch": 0.18, "grad_norm": 1.0875796335565775, "learning_rate": 9.898765348101464e-06, "loss": 0.6638, "step": 1448 }, { "epoch": 0.18, "grad_norm": 0.8013969374803733, "learning_rate": 9.898558710277893e-06, "loss": 0.6208, "step": 1449 }, { "epoch": 0.18, "grad_norm": 0.827640750552822, "learning_rate": 9.898351863938328e-06, "loss": 0.6842, "step": 1450 }, { "epoch": 0.18, "grad_norm": 0.8845593547754813, "learning_rate": 9.898144809091578e-06, "loss": 0.6881, "step": 1451 }, { "epoch": 0.18, "grad_norm": 0.6339290565768775, "learning_rate": 9.897937545746457e-06, "loss": 0.5589, "step": 1452 }, { "epoch": 0.19, "grad_norm": 0.63683296212066, "learning_rate": 9.897730073911785e-06, "loss": 0.5677, "step": 1453 }, { "epoch": 0.19, "grad_norm": 0.8037689871613029, "learning_rate": 9.897522393596395e-06, "loss": 0.6354, "step": 1454 }, { "epoch": 0.19, "grad_norm": 0.7750823129055913, "learning_rate": 9.897314504809128e-06, "loss": 0.5568, "step": 1455 }, { "epoch": 0.19, "grad_norm": 0.6269252556078115, "learning_rate": 9.89710640755883e-06, "loss": 0.5016, "step": 1456 }, { "epoch": 0.19, "grad_norm": 0.5762192182401308, "learning_rate": 9.896898101854363e-06, "loss": 0.5174, "step": 1457 }, { "epoch": 0.19, "grad_norm": 1.5017342628673056, "learning_rate": 9.896689587704591e-06, "loss": 0.6123, "step": 1458 }, { "epoch": 0.19, "grad_norm": 0.7616051248698723, "learning_rate": 9.896480865118393e-06, "loss": 0.5661, "step": 1459 }, { "epoch": 0.19, "grad_norm": 0.6299426594954421, "learning_rate": 9.896271934104649e-06, "loss": 0.537, "step": 1460 }, { "epoch": 0.19, "grad_norm": 0.6590310849058559, "learning_rate": 9.896062794672255e-06, "loss": 0.5836, "step": 1461 }, { "epoch": 0.19, "grad_norm": 0.6677798165719644, "learning_rate": 9.895853446830115e-06, "loss": 0.6128, "step": 1462 }, { "epoch": 0.19, "grad_norm": 1.7311823577825591, "learning_rate": 9.895643890587137e-06, "loss": 0.6706, "step": 1463 }, { "epoch": 0.19, "grad_norm": 0.5735339436796842, "learning_rate": 9.895434125952244e-06, "loss": 0.552, "step": 1464 }, { "epoch": 0.19, "grad_norm": 0.6007134379056589, "learning_rate": 9.895224152934362e-06, "loss": 0.5615, "step": 1465 }, { "epoch": 0.19, "grad_norm": 0.792092039855217, "learning_rate": 9.895013971542433e-06, "loss": 0.592, "step": 1466 }, { "epoch": 0.19, "grad_norm": 0.9023231896386574, "learning_rate": 9.8948035817854e-06, "loss": 0.6443, "step": 1467 }, { "epoch": 0.19, "grad_norm": 0.8971879997141177, "learning_rate": 9.894592983672223e-06, "loss": 0.6252, "step": 1468 }, { "epoch": 0.19, "grad_norm": 0.6786774174016315, "learning_rate": 9.89438217721186e-06, "loss": 0.5836, "step": 1469 }, { "epoch": 0.19, "grad_norm": 0.7149454501134153, "learning_rate": 9.894171162413289e-06, "loss": 0.5909, "step": 1470 }, { "epoch": 0.19, "grad_norm": 0.6392940942523266, "learning_rate": 9.893959939285491e-06, "loss": 0.5693, "step": 1471 }, { "epoch": 0.19, "grad_norm": 0.8879975013720354, "learning_rate": 9.893748507837458e-06, "loss": 0.6327, "step": 1472 }, { "epoch": 0.19, "grad_norm": 0.8588077828201054, "learning_rate": 9.893536868078188e-06, "loss": 0.6957, "step": 1473 }, { "epoch": 0.19, "grad_norm": 0.9244754258118437, "learning_rate": 9.893325020016692e-06, "loss": 0.6857, "step": 1474 }, { "epoch": 0.19, "grad_norm": 1.0370381276392335, "learning_rate": 9.893112963661986e-06, "loss": 0.5826, "step": 1475 }, { "epoch": 0.19, "grad_norm": 0.7727905979684659, "learning_rate": 9.892900699023098e-06, "loss": 0.6026, "step": 1476 }, { "epoch": 0.19, "grad_norm": 0.6070696744331471, "learning_rate": 9.892688226109064e-06, "loss": 0.4797, "step": 1477 }, { "epoch": 0.19, "grad_norm": 0.7962443280674872, "learning_rate": 9.892475544928925e-06, "loss": 0.5596, "step": 1478 }, { "epoch": 0.19, "grad_norm": 0.8647045792132472, "learning_rate": 9.892262655491736e-06, "loss": 0.6613, "step": 1479 }, { "epoch": 0.19, "grad_norm": 0.7776722259973866, "learning_rate": 9.89204955780656e-06, "loss": 0.6021, "step": 1480 }, { "epoch": 0.19, "grad_norm": 0.6041916236982071, "learning_rate": 9.891836251882468e-06, "loss": 0.4868, "step": 1481 }, { "epoch": 0.19, "grad_norm": 0.6806411366767511, "learning_rate": 9.891622737728537e-06, "loss": 0.5788, "step": 1482 }, { "epoch": 0.19, "grad_norm": 0.7959127973674263, "learning_rate": 9.891409015353859e-06, "loss": 0.6046, "step": 1483 }, { "epoch": 0.19, "grad_norm": 0.8228697951855963, "learning_rate": 9.89119508476753e-06, "loss": 0.5781, "step": 1484 }, { "epoch": 0.19, "grad_norm": 0.7064391469084955, "learning_rate": 9.890980945978655e-06, "loss": 0.5517, "step": 1485 }, { "epoch": 0.19, "grad_norm": 0.9005920425585042, "learning_rate": 9.89076659899635e-06, "loss": 0.6435, "step": 1486 }, { "epoch": 0.19, "grad_norm": 0.7604358015849504, "learning_rate": 9.89055204382974e-06, "loss": 0.591, "step": 1487 }, { "epoch": 0.19, "grad_norm": 0.684928912636846, "learning_rate": 9.890337280487958e-06, "loss": 0.5108, "step": 1488 }, { "epoch": 0.19, "grad_norm": 0.8129477359373968, "learning_rate": 9.890122308980145e-06, "loss": 0.6252, "step": 1489 }, { "epoch": 0.19, "grad_norm": 0.8150066125639978, "learning_rate": 9.889907129315452e-06, "loss": 0.6235, "step": 1490 }, { "epoch": 0.19, "grad_norm": 0.902468709050217, "learning_rate": 9.889691741503038e-06, "loss": 0.6093, "step": 1491 }, { "epoch": 0.19, "grad_norm": 0.922199679349637, "learning_rate": 9.889476145552073e-06, "loss": 0.687, "step": 1492 }, { "epoch": 0.19, "grad_norm": 0.7101187977843562, "learning_rate": 9.889260341471732e-06, "loss": 0.5222, "step": 1493 }, { "epoch": 0.19, "grad_norm": 0.6516247339669807, "learning_rate": 9.889044329271202e-06, "loss": 0.6044, "step": 1494 }, { "epoch": 0.19, "grad_norm": 0.6617508541540921, "learning_rate": 9.888828108959678e-06, "loss": 0.5943, "step": 1495 }, { "epoch": 0.19, "grad_norm": 0.6547760173649779, "learning_rate": 9.888611680546366e-06, "loss": 0.5926, "step": 1496 }, { "epoch": 0.19, "grad_norm": 0.8146903408494094, "learning_rate": 9.888395044040475e-06, "loss": 0.6898, "step": 1497 }, { "epoch": 0.19, "grad_norm": 0.9730046667110773, "learning_rate": 9.888178199451227e-06, "loss": 0.6485, "step": 1498 }, { "epoch": 0.19, "grad_norm": 0.5776389634461832, "learning_rate": 9.887961146787854e-06, "loss": 0.5667, "step": 1499 }, { "epoch": 0.19, "grad_norm": 0.9170097770006553, "learning_rate": 9.887743886059595e-06, "loss": 0.6274, "step": 1500 }, { "epoch": 0.19, "grad_norm": 0.675811892876071, "learning_rate": 9.887526417275698e-06, "loss": 0.5421, "step": 1501 }, { "epoch": 0.19, "grad_norm": 0.8182205446623694, "learning_rate": 9.88730874044542e-06, "loss": 0.6506, "step": 1502 }, { "epoch": 0.19, "grad_norm": 0.688221644284625, "learning_rate": 9.887090855578026e-06, "loss": 0.5645, "step": 1503 }, { "epoch": 0.19, "grad_norm": 0.7839834339247176, "learning_rate": 9.886872762682791e-06, "loss": 0.6135, "step": 1504 }, { "epoch": 0.19, "grad_norm": 0.7414209322009344, "learning_rate": 9.886654461768999e-06, "loss": 0.5907, "step": 1505 }, { "epoch": 0.19, "grad_norm": 0.7005784343852325, "learning_rate": 9.886435952845941e-06, "loss": 0.5969, "step": 1506 }, { "epoch": 0.19, "grad_norm": 0.9034003306411155, "learning_rate": 9.886217235922921e-06, "loss": 0.6325, "step": 1507 }, { "epoch": 0.19, "grad_norm": 0.6488541912557233, "learning_rate": 9.885998311009246e-06, "loss": 0.5933, "step": 1508 }, { "epoch": 0.19, "grad_norm": 0.5939210241457038, "learning_rate": 9.885779178114236e-06, "loss": 0.5055, "step": 1509 }, { "epoch": 0.19, "grad_norm": 0.7107377608775156, "learning_rate": 9.88555983724722e-06, "loss": 0.5928, "step": 1510 }, { "epoch": 0.19, "grad_norm": 1.1112842519856718, "learning_rate": 9.885340288417535e-06, "loss": 0.7156, "step": 1511 }, { "epoch": 0.19, "grad_norm": 0.6305165668223054, "learning_rate": 9.885120531634525e-06, "loss": 0.614, "step": 1512 }, { "epoch": 0.19, "grad_norm": 0.7158541089272958, "learning_rate": 9.884900566907543e-06, "loss": 0.5829, "step": 1513 }, { "epoch": 0.19, "grad_norm": 0.9801514047422134, "learning_rate": 9.884680394245953e-06, "loss": 0.6375, "step": 1514 }, { "epoch": 0.19, "grad_norm": 0.8273363649517759, "learning_rate": 9.88446001365913e-06, "loss": 0.6249, "step": 1515 }, { "epoch": 0.19, "grad_norm": 0.7454375934919218, "learning_rate": 9.884239425156452e-06, "loss": 0.6193, "step": 1516 }, { "epoch": 0.19, "grad_norm": 0.7092783240013331, "learning_rate": 9.88401862874731e-06, "loss": 0.5809, "step": 1517 }, { "epoch": 0.19, "grad_norm": 0.6810210577340193, "learning_rate": 9.8837976244411e-06, "loss": 0.5678, "step": 1518 }, { "epoch": 0.19, "grad_norm": 0.6809892291975016, "learning_rate": 9.883576412247233e-06, "loss": 0.5781, "step": 1519 }, { "epoch": 0.19, "grad_norm": 0.8503934579940726, "learning_rate": 9.883354992175124e-06, "loss": 0.6795, "step": 1520 }, { "epoch": 0.19, "grad_norm": 0.9274211197639796, "learning_rate": 9.883133364234198e-06, "loss": 0.7125, "step": 1521 }, { "epoch": 0.19, "grad_norm": 0.9042072813027953, "learning_rate": 9.88291152843389e-06, "loss": 0.7166, "step": 1522 }, { "epoch": 0.19, "grad_norm": 0.6546267571881859, "learning_rate": 9.88268948478364e-06, "loss": 0.5698, "step": 1523 }, { "epoch": 0.19, "grad_norm": 0.6648143273555109, "learning_rate": 9.882467233292901e-06, "loss": 0.6095, "step": 1524 }, { "epoch": 0.19, "grad_norm": 0.669896902749318, "learning_rate": 9.882244773971137e-06, "loss": 0.6117, "step": 1525 }, { "epoch": 0.19, "grad_norm": 0.669379190520074, "learning_rate": 9.88202210682781e-06, "loss": 0.5603, "step": 1526 }, { "epoch": 0.19, "grad_norm": 0.6705263480036763, "learning_rate": 9.881799231872406e-06, "loss": 0.5099, "step": 1527 }, { "epoch": 0.19, "grad_norm": 0.5993458923502961, "learning_rate": 9.881576149114407e-06, "loss": 0.509, "step": 1528 }, { "epoch": 0.19, "grad_norm": 2.0238276485462094, "learning_rate": 9.88135285856331e-06, "loss": 0.5775, "step": 1529 }, { "epoch": 0.19, "grad_norm": 0.8936331822670118, "learning_rate": 9.881129360228623e-06, "loss": 0.6531, "step": 1530 }, { "epoch": 0.2, "grad_norm": 0.9032830182022972, "learning_rate": 9.880905654119856e-06, "loss": 0.5964, "step": 1531 }, { "epoch": 0.2, "grad_norm": 0.8753903201065488, "learning_rate": 9.880681740246531e-06, "loss": 0.6343, "step": 1532 }, { "epoch": 0.2, "grad_norm": 0.7976664177553304, "learning_rate": 9.880457618618184e-06, "loss": 0.558, "step": 1533 }, { "epoch": 0.2, "grad_norm": 0.769384373440501, "learning_rate": 9.88023328924435e-06, "loss": 0.5724, "step": 1534 }, { "epoch": 0.2, "grad_norm": 0.708971831517012, "learning_rate": 9.880008752134578e-06, "loss": 0.5605, "step": 1535 }, { "epoch": 0.2, "grad_norm": 0.6838771857352796, "learning_rate": 9.87978400729843e-06, "loss": 0.5889, "step": 1536 }, { "epoch": 0.2, "grad_norm": 0.7179981698375474, "learning_rate": 9.87955905474547e-06, "loss": 0.6386, "step": 1537 }, { "epoch": 0.2, "grad_norm": 0.7286557985737631, "learning_rate": 9.879333894485272e-06, "loss": 0.5959, "step": 1538 }, { "epoch": 0.2, "grad_norm": 0.6417993377828476, "learning_rate": 9.879108526527425e-06, "loss": 0.5761, "step": 1539 }, { "epoch": 0.2, "grad_norm": 0.6455887264965151, "learning_rate": 9.878882950881518e-06, "loss": 0.5701, "step": 1540 }, { "epoch": 0.2, "grad_norm": 0.9392531020535543, "learning_rate": 9.878657167557156e-06, "loss": 0.6737, "step": 1541 }, { "epoch": 0.2, "grad_norm": 0.8052631006205957, "learning_rate": 9.878431176563945e-06, "loss": 0.6034, "step": 1542 }, { "epoch": 0.2, "grad_norm": 0.6452744052517272, "learning_rate": 9.87820497791151e-06, "loss": 0.5617, "step": 1543 }, { "epoch": 0.2, "grad_norm": 0.8888982388016089, "learning_rate": 9.877978571609479e-06, "loss": 0.6541, "step": 1544 }, { "epoch": 0.2, "grad_norm": 0.8622303599846088, "learning_rate": 9.877751957667486e-06, "loss": 0.6467, "step": 1545 }, { "epoch": 0.2, "grad_norm": 0.760571615055131, "learning_rate": 9.87752513609518e-06, "loss": 0.615, "step": 1546 }, { "epoch": 0.2, "grad_norm": 0.6745613351888998, "learning_rate": 9.877298106902216e-06, "loss": 0.5902, "step": 1547 }, { "epoch": 0.2, "grad_norm": 0.6051554949691383, "learning_rate": 9.877070870098256e-06, "loss": 0.5162, "step": 1548 }, { "epoch": 0.2, "grad_norm": 0.6335179298694252, "learning_rate": 9.876843425692975e-06, "loss": 0.5526, "step": 1549 }, { "epoch": 0.2, "grad_norm": 0.8044564719001647, "learning_rate": 9.876615773696053e-06, "loss": 0.6415, "step": 1550 }, { "epoch": 0.2, "grad_norm": 1.2969363633187778, "learning_rate": 9.876387914117181e-06, "loss": 0.6653, "step": 1551 }, { "epoch": 0.2, "grad_norm": 0.8054796427768358, "learning_rate": 9.876159846966057e-06, "loss": 0.5273, "step": 1552 }, { "epoch": 0.2, "grad_norm": 0.9842086078093535, "learning_rate": 9.87593157225239e-06, "loss": 0.5805, "step": 1553 }, { "epoch": 0.2, "grad_norm": 0.6891899317152046, "learning_rate": 9.8757030899859e-06, "loss": 0.5981, "step": 1554 }, { "epoch": 0.2, "grad_norm": 1.3612018114252962, "learning_rate": 9.875474400176307e-06, "loss": 0.6092, "step": 1555 }, { "epoch": 0.2, "grad_norm": 0.6543286888948006, "learning_rate": 9.87524550283335e-06, "loss": 0.5868, "step": 1556 }, { "epoch": 0.2, "grad_norm": 0.769976874425928, "learning_rate": 9.875016397966772e-06, "loss": 0.6744, "step": 1557 }, { "epoch": 0.2, "grad_norm": 0.585257672773087, "learning_rate": 9.874787085586323e-06, "loss": 0.6127, "step": 1558 }, { "epoch": 0.2, "grad_norm": 0.5717036765553271, "learning_rate": 9.874557565701766e-06, "loss": 0.5382, "step": 1559 }, { "epoch": 0.2, "grad_norm": 0.6541689594121229, "learning_rate": 9.87432783832287e-06, "loss": 0.5523, "step": 1560 }, { "epoch": 0.2, "grad_norm": 0.8055566439494736, "learning_rate": 9.874097903459413e-06, "loss": 0.6251, "step": 1561 }, { "epoch": 0.2, "grad_norm": 0.8346090143319688, "learning_rate": 9.873867761121186e-06, "loss": 0.6496, "step": 1562 }, { "epoch": 0.2, "grad_norm": 0.670308442840768, "learning_rate": 9.873637411317982e-06, "loss": 0.5613, "step": 1563 }, { "epoch": 0.2, "grad_norm": 0.6667886214579567, "learning_rate": 9.873406854059606e-06, "loss": 0.5542, "step": 1564 }, { "epoch": 0.2, "grad_norm": 0.6787516379371499, "learning_rate": 9.873176089355876e-06, "loss": 0.5624, "step": 1565 }, { "epoch": 0.2, "grad_norm": 0.7509925927710046, "learning_rate": 9.87294511721661e-06, "loss": 0.5894, "step": 1566 }, { "epoch": 0.2, "grad_norm": 0.9022552804982648, "learning_rate": 9.872713937651644e-06, "loss": 0.6462, "step": 1567 }, { "epoch": 0.2, "grad_norm": 0.6648974660353939, "learning_rate": 9.872482550670815e-06, "loss": 0.6236, "step": 1568 }, { "epoch": 0.2, "grad_norm": 0.9422739387844301, "learning_rate": 9.872250956283974e-06, "loss": 0.595, "step": 1569 }, { "epoch": 0.2, "grad_norm": 0.7314958690611608, "learning_rate": 9.87201915450098e-06, "loss": 0.6098, "step": 1570 }, { "epoch": 0.2, "grad_norm": 0.7636374468141408, "learning_rate": 9.8717871453317e-06, "loss": 0.6245, "step": 1571 }, { "epoch": 0.2, "grad_norm": 0.8121591259993941, "learning_rate": 9.871554928786007e-06, "loss": 0.5783, "step": 1572 }, { "epoch": 0.2, "grad_norm": 0.9954410395816852, "learning_rate": 9.87132250487379e-06, "loss": 0.6484, "step": 1573 }, { "epoch": 0.2, "grad_norm": 0.7954625985334292, "learning_rate": 9.87108987360494e-06, "loss": 0.5603, "step": 1574 }, { "epoch": 0.2, "grad_norm": 0.9622292414595693, "learning_rate": 9.870857034989359e-06, "loss": 0.6343, "step": 1575 }, { "epoch": 0.2, "grad_norm": 0.6513245173343255, "learning_rate": 9.87062398903696e-06, "loss": 0.554, "step": 1576 }, { "epoch": 0.2, "grad_norm": 0.7017719471811728, "learning_rate": 9.87039073575766e-06, "loss": 0.5777, "step": 1577 }, { "epoch": 0.2, "grad_norm": 0.8207537650088288, "learning_rate": 9.870157275161394e-06, "loss": 0.5658, "step": 1578 }, { "epoch": 0.2, "grad_norm": 0.9306911535789684, "learning_rate": 9.869923607258093e-06, "loss": 0.5868, "step": 1579 }, { "epoch": 0.2, "grad_norm": 0.8082491365487466, "learning_rate": 9.869689732057706e-06, "loss": 0.7051, "step": 1580 }, { "epoch": 0.2, "grad_norm": 0.7968390618902453, "learning_rate": 9.869455649570188e-06, "loss": 0.6241, "step": 1581 }, { "epoch": 0.2, "grad_norm": 0.7852533651109809, "learning_rate": 9.869221359805506e-06, "loss": 0.6005, "step": 1582 }, { "epoch": 0.2, "grad_norm": 0.844980110728931, "learning_rate": 9.868986862773628e-06, "loss": 0.6541, "step": 1583 }, { "epoch": 0.2, "grad_norm": 0.890366904680627, "learning_rate": 9.86875215848454e-06, "loss": 0.6543, "step": 1584 }, { "epoch": 0.2, "grad_norm": 0.6280073463676458, "learning_rate": 9.868517246948228e-06, "loss": 0.5546, "step": 1585 }, { "epoch": 0.2, "grad_norm": 0.682946858818819, "learning_rate": 9.868282128174698e-06, "loss": 0.5619, "step": 1586 }, { "epoch": 0.2, "grad_norm": 0.6369580171058496, "learning_rate": 9.868046802173952e-06, "loss": 0.5563, "step": 1587 }, { "epoch": 0.2, "grad_norm": 0.6459659497595663, "learning_rate": 9.867811268956011e-06, "loss": 0.5679, "step": 1588 }, { "epoch": 0.2, "grad_norm": 0.9076663467022951, "learning_rate": 9.867575528530899e-06, "loss": 0.6413, "step": 1589 }, { "epoch": 0.2, "grad_norm": 0.7644414635666698, "learning_rate": 9.867339580908652e-06, "loss": 0.5695, "step": 1590 }, { "epoch": 0.2, "grad_norm": 0.9802070894770096, "learning_rate": 9.867103426099313e-06, "loss": 0.6452, "step": 1591 }, { "epoch": 0.2, "grad_norm": 0.9292906996900488, "learning_rate": 9.866867064112934e-06, "loss": 0.6407, "step": 1592 }, { "epoch": 0.2, "grad_norm": 0.6791333305841595, "learning_rate": 9.866630494959574e-06, "loss": 0.5767, "step": 1593 }, { "epoch": 0.2, "grad_norm": 0.6453810795437466, "learning_rate": 9.866393718649309e-06, "loss": 0.5665, "step": 1594 }, { "epoch": 0.2, "grad_norm": 0.8320577331876663, "learning_rate": 9.866156735192211e-06, "loss": 0.6371, "step": 1595 }, { "epoch": 0.2, "grad_norm": 0.7034813881171303, "learning_rate": 9.865919544598373e-06, "loss": 0.5716, "step": 1596 }, { "epoch": 0.2, "grad_norm": 0.7509585379589557, "learning_rate": 9.865682146877888e-06, "loss": 0.5962, "step": 1597 }, { "epoch": 0.2, "grad_norm": 0.9686834691796042, "learning_rate": 9.865444542040865e-06, "loss": 0.6133, "step": 1598 }, { "epoch": 0.2, "grad_norm": 0.8268502522136598, "learning_rate": 9.865206730097413e-06, "loss": 0.6333, "step": 1599 }, { "epoch": 0.2, "grad_norm": 1.1796734025912674, "learning_rate": 9.86496871105766e-06, "loss": 0.6021, "step": 1600 }, { "epoch": 0.2, "grad_norm": 0.5929343695552657, "learning_rate": 9.86473048493173e-06, "loss": 0.4875, "step": 1601 }, { "epoch": 0.2, "grad_norm": 0.8711210540344185, "learning_rate": 9.864492051729772e-06, "loss": 0.6851, "step": 1602 }, { "epoch": 0.2, "grad_norm": 0.9756325623444182, "learning_rate": 9.864253411461932e-06, "loss": 0.6592, "step": 1603 }, { "epoch": 0.2, "grad_norm": 0.8278114941762432, "learning_rate": 9.864014564138369e-06, "loss": 0.6452, "step": 1604 }, { "epoch": 0.2, "grad_norm": 0.8427554962800573, "learning_rate": 9.86377550976925e-06, "loss": 0.6274, "step": 1605 }, { "epoch": 0.2, "grad_norm": 0.780829481157279, "learning_rate": 9.863536248364748e-06, "loss": 0.6168, "step": 1606 }, { "epoch": 0.2, "grad_norm": 0.8066292955098436, "learning_rate": 9.86329677993505e-06, "loss": 0.5908, "step": 1607 }, { "epoch": 0.2, "grad_norm": 0.5680189296272247, "learning_rate": 9.863057104490348e-06, "loss": 0.5663, "step": 1608 }, { "epoch": 0.2, "grad_norm": 0.8443458599638527, "learning_rate": 9.862817222040845e-06, "loss": 0.6007, "step": 1609 }, { "epoch": 0.21, "grad_norm": 0.9003249014982616, "learning_rate": 9.862577132596755e-06, "loss": 0.6193, "step": 1610 }, { "epoch": 0.21, "grad_norm": 0.852654164152259, "learning_rate": 9.862336836168293e-06, "loss": 0.6172, "step": 1611 }, { "epoch": 0.21, "grad_norm": 0.5795560644283292, "learning_rate": 9.86209633276569e-06, "loss": 0.547, "step": 1612 }, { "epoch": 0.21, "grad_norm": 0.6376658582526519, "learning_rate": 9.861855622399184e-06, "loss": 0.5538, "step": 1613 }, { "epoch": 0.21, "grad_norm": 0.6823015338001743, "learning_rate": 9.86161470507902e-06, "loss": 0.5244, "step": 1614 }, { "epoch": 0.21, "grad_norm": 0.7020896797015818, "learning_rate": 9.861373580815452e-06, "loss": 0.6004, "step": 1615 }, { "epoch": 0.21, "grad_norm": 0.7067135748192178, "learning_rate": 9.861132249618745e-06, "loss": 0.5495, "step": 1616 }, { "epoch": 0.21, "grad_norm": 0.7840787137883893, "learning_rate": 9.860890711499175e-06, "loss": 0.6569, "step": 1617 }, { "epoch": 0.21, "grad_norm": 0.6508566226300785, "learning_rate": 9.860648966467018e-06, "loss": 0.5679, "step": 1618 }, { "epoch": 0.21, "grad_norm": 0.7997398853337984, "learning_rate": 9.860407014532568e-06, "loss": 0.63, "step": 1619 }, { "epoch": 0.21, "grad_norm": 0.6305162767609661, "learning_rate": 9.860164855706123e-06, "loss": 0.5793, "step": 1620 }, { "epoch": 0.21, "grad_norm": 0.6898611702327054, "learning_rate": 9.859922489997991e-06, "loss": 0.5842, "step": 1621 }, { "epoch": 0.21, "grad_norm": 0.6204437929878805, "learning_rate": 9.859679917418489e-06, "loss": 0.5621, "step": 1622 }, { "epoch": 0.21, "grad_norm": 0.8567548920555998, "learning_rate": 9.859437137977942e-06, "loss": 0.6206, "step": 1623 }, { "epoch": 0.21, "grad_norm": 0.6573511487962571, "learning_rate": 9.859194151686685e-06, "loss": 0.5791, "step": 1624 }, { "epoch": 0.21, "grad_norm": 0.7905473570590201, "learning_rate": 9.85895095855506e-06, "loss": 0.5781, "step": 1625 }, { "epoch": 0.21, "grad_norm": 0.9481362480472231, "learning_rate": 9.858707558593421e-06, "loss": 0.6792, "step": 1626 }, { "epoch": 0.21, "grad_norm": 0.838869534675686, "learning_rate": 9.858463951812125e-06, "loss": 0.6789, "step": 1627 }, { "epoch": 0.21, "grad_norm": 0.6271821937311737, "learning_rate": 9.858220138221546e-06, "loss": 0.531, "step": 1628 }, { "epoch": 0.21, "grad_norm": 0.6543623055698378, "learning_rate": 9.85797611783206e-06, "loss": 0.5008, "step": 1629 }, { "epoch": 0.21, "grad_norm": 0.7101443776889375, "learning_rate": 9.857731890654055e-06, "loss": 0.5595, "step": 1630 }, { "epoch": 0.21, "grad_norm": 0.933466387973217, "learning_rate": 9.857487456697925e-06, "loss": 0.576, "step": 1631 }, { "epoch": 0.21, "grad_norm": 1.408488406231972, "learning_rate": 9.857242815974078e-06, "loss": 0.6954, "step": 1632 }, { "epoch": 0.21, "grad_norm": 0.9322296346854334, "learning_rate": 9.856997968492924e-06, "loss": 0.6195, "step": 1633 }, { "epoch": 0.21, "grad_norm": 0.8527005341614082, "learning_rate": 9.856752914264889e-06, "loss": 0.6158, "step": 1634 }, { "epoch": 0.21, "grad_norm": 0.6029817931915498, "learning_rate": 9.8565076533004e-06, "loss": 0.5773, "step": 1635 }, { "epoch": 0.21, "grad_norm": 0.6922544203961906, "learning_rate": 9.856262185609901e-06, "loss": 0.5389, "step": 1636 }, { "epoch": 0.21, "grad_norm": 0.6657983220041698, "learning_rate": 9.856016511203839e-06, "loss": 0.5861, "step": 1637 }, { "epoch": 0.21, "grad_norm": 1.0928942883190693, "learning_rate": 9.855770630092672e-06, "loss": 0.6335, "step": 1638 }, { "epoch": 0.21, "grad_norm": 1.1189330925779744, "learning_rate": 9.855524542286866e-06, "loss": 0.606, "step": 1639 }, { "epoch": 0.21, "grad_norm": 0.8953629009058321, "learning_rate": 9.855278247796897e-06, "loss": 0.5854, "step": 1640 }, { "epoch": 0.21, "grad_norm": 0.6875087356327355, "learning_rate": 9.855031746633247e-06, "loss": 0.5434, "step": 1641 }, { "epoch": 0.21, "grad_norm": 1.1680271686982153, "learning_rate": 9.854785038806411e-06, "loss": 0.65, "step": 1642 }, { "epoch": 0.21, "grad_norm": 0.6489497901061975, "learning_rate": 9.854538124326889e-06, "loss": 0.6051, "step": 1643 }, { "epoch": 0.21, "grad_norm": 0.6195205120566304, "learning_rate": 9.854291003205192e-06, "loss": 0.5157, "step": 1644 }, { "epoch": 0.21, "grad_norm": 0.8719980440711714, "learning_rate": 9.854043675451838e-06, "loss": 0.6223, "step": 1645 }, { "epoch": 0.21, "grad_norm": 0.8184096554586712, "learning_rate": 9.853796141077357e-06, "loss": 0.6516, "step": 1646 }, { "epoch": 0.21, "grad_norm": 0.6511274776408341, "learning_rate": 9.853548400092285e-06, "loss": 0.5093, "step": 1647 }, { "epoch": 0.21, "grad_norm": 0.9905158951835199, "learning_rate": 9.853300452507167e-06, "loss": 0.6101, "step": 1648 }, { "epoch": 0.21, "grad_norm": 0.6572766198066207, "learning_rate": 9.853052298332559e-06, "loss": 0.5691, "step": 1649 }, { "epoch": 0.21, "grad_norm": 0.6855701865032515, "learning_rate": 9.85280393757902e-06, "loss": 0.541, "step": 1650 }, { "epoch": 0.21, "grad_norm": 0.7668807561188197, "learning_rate": 9.852555370257127e-06, "loss": 0.6382, "step": 1651 }, { "epoch": 0.21, "grad_norm": 0.6325259906279111, "learning_rate": 9.852306596377459e-06, "loss": 0.5566, "step": 1652 }, { "epoch": 0.21, "grad_norm": 0.9250753695471651, "learning_rate": 9.852057615950603e-06, "loss": 0.6393, "step": 1653 }, { "epoch": 0.21, "grad_norm": 0.9279146497246463, "learning_rate": 9.85180842898716e-06, "loss": 0.6717, "step": 1654 }, { "epoch": 0.21, "grad_norm": 0.7987768441302029, "learning_rate": 9.851559035497735e-06, "loss": 0.6624, "step": 1655 }, { "epoch": 0.21, "grad_norm": 0.5686685449129294, "learning_rate": 9.851309435492948e-06, "loss": 0.5228, "step": 1656 }, { "epoch": 0.21, "grad_norm": 0.7910938429868714, "learning_rate": 9.85105962898342e-06, "loss": 0.6313, "step": 1657 }, { "epoch": 0.21, "grad_norm": 0.7554652092727698, "learning_rate": 9.850809615979785e-06, "loss": 0.6802, "step": 1658 }, { "epoch": 0.21, "grad_norm": 0.9099323233798392, "learning_rate": 9.850559396492685e-06, "loss": 0.655, "step": 1659 }, { "epoch": 0.21, "grad_norm": 0.8393170889094593, "learning_rate": 9.850308970532772e-06, "loss": 0.6576, "step": 1660 }, { "epoch": 0.21, "grad_norm": 0.7841118211705639, "learning_rate": 9.850058338110705e-06, "loss": 0.5871, "step": 1661 }, { "epoch": 0.21, "grad_norm": 0.5549050394204721, "learning_rate": 9.849807499237154e-06, "loss": 0.5044, "step": 1662 }, { "epoch": 0.21, "grad_norm": 0.686561215272153, "learning_rate": 9.849556453922796e-06, "loss": 0.5421, "step": 1663 }, { "epoch": 0.21, "grad_norm": 0.5762718732077279, "learning_rate": 9.849305202178314e-06, "loss": 0.5529, "step": 1664 }, { "epoch": 0.21, "grad_norm": 0.758757336304438, "learning_rate": 9.849053744014408e-06, "loss": 0.6206, "step": 1665 }, { "epoch": 0.21, "grad_norm": 0.55247077233078, "learning_rate": 9.848802079441779e-06, "loss": 0.5167, "step": 1666 }, { "epoch": 0.21, "grad_norm": 0.8895213823591999, "learning_rate": 9.848550208471142e-06, "loss": 0.6165, "step": 1667 }, { "epoch": 0.21, "grad_norm": 0.855374858594082, "learning_rate": 9.848298131113214e-06, "loss": 0.6212, "step": 1668 }, { "epoch": 0.21, "grad_norm": 0.6102036648610574, "learning_rate": 9.848045847378726e-06, "loss": 0.5034, "step": 1669 }, { "epoch": 0.21, "grad_norm": 0.825259433810369, "learning_rate": 9.847793357278421e-06, "loss": 0.6835, "step": 1670 }, { "epoch": 0.21, "grad_norm": 0.6130078610734073, "learning_rate": 9.847540660823045e-06, "loss": 0.5656, "step": 1671 }, { "epoch": 0.21, "grad_norm": 0.7704459089579551, "learning_rate": 9.847287758023351e-06, "loss": 0.6029, "step": 1672 }, { "epoch": 0.21, "grad_norm": 0.6260214451166097, "learning_rate": 9.847034648890108e-06, "loss": 0.5277, "step": 1673 }, { "epoch": 0.21, "grad_norm": 0.8269750454306841, "learning_rate": 9.84678133343409e-06, "loss": 0.5882, "step": 1674 }, { "epoch": 0.21, "grad_norm": 0.7715515416702583, "learning_rate": 9.846527811666078e-06, "loss": 0.5867, "step": 1675 }, { "epoch": 0.21, "grad_norm": 0.6165145422690684, "learning_rate": 9.846274083596863e-06, "loss": 0.5475, "step": 1676 }, { "epoch": 0.21, "grad_norm": 0.6233397616203508, "learning_rate": 9.846020149237248e-06, "loss": 0.545, "step": 1677 }, { "epoch": 0.21, "grad_norm": 0.6156061510869181, "learning_rate": 9.845766008598042e-06, "loss": 0.5755, "step": 1678 }, { "epoch": 0.21, "grad_norm": 0.9484201167773875, "learning_rate": 9.845511661690059e-06, "loss": 0.6152, "step": 1679 }, { "epoch": 0.21, "grad_norm": 0.6501776492838833, "learning_rate": 9.84525710852413e-06, "loss": 0.5681, "step": 1680 }, { "epoch": 0.21, "grad_norm": 0.6109494161031894, "learning_rate": 9.84500234911109e-06, "loss": 0.5359, "step": 1681 }, { "epoch": 0.21, "grad_norm": 0.6539827487484311, "learning_rate": 9.84474738346178e-06, "loss": 0.5798, "step": 1682 }, { "epoch": 0.21, "grad_norm": 0.634957277775411, "learning_rate": 9.844492211587058e-06, "loss": 0.5734, "step": 1683 }, { "epoch": 0.21, "grad_norm": 0.6636185620154464, "learning_rate": 9.844236833497782e-06, "loss": 0.5195, "step": 1684 }, { "epoch": 0.21, "grad_norm": 1.2841736516006863, "learning_rate": 9.843981249204825e-06, "loss": 0.6146, "step": 1685 }, { "epoch": 0.21, "grad_norm": 0.7380799101894738, "learning_rate": 9.843725458719065e-06, "loss": 0.5537, "step": 1686 }, { "epoch": 0.21, "grad_norm": 0.6474222869333314, "learning_rate": 9.84346946205139e-06, "loss": 0.5762, "step": 1687 }, { "epoch": 0.22, "grad_norm": 0.6328728625853145, "learning_rate": 9.843213259212698e-06, "loss": 0.5034, "step": 1688 }, { "epoch": 0.22, "grad_norm": 0.9351332256119143, "learning_rate": 9.842956850213893e-06, "loss": 0.6256, "step": 1689 }, { "epoch": 0.22, "grad_norm": 0.6733164129517442, "learning_rate": 9.842700235065893e-06, "loss": 0.6195, "step": 1690 }, { "epoch": 0.22, "grad_norm": 0.5682114661543541, "learning_rate": 9.842443413779618e-06, "loss": 0.5294, "step": 1691 }, { "epoch": 0.22, "grad_norm": 0.6413366655094044, "learning_rate": 9.842186386366002e-06, "loss": 0.5897, "step": 1692 }, { "epoch": 0.22, "grad_norm": 0.63052655305629, "learning_rate": 9.841929152835983e-06, "loss": 0.5714, "step": 1693 }, { "epoch": 0.22, "grad_norm": 0.67460003471768, "learning_rate": 9.841671713200513e-06, "loss": 0.548, "step": 1694 }, { "epoch": 0.22, "grad_norm": 0.8176681406211385, "learning_rate": 9.841414067470552e-06, "loss": 0.6159, "step": 1695 }, { "epoch": 0.22, "grad_norm": 0.9175096301150686, "learning_rate": 9.841156215657063e-06, "loss": 0.6853, "step": 1696 }, { "epoch": 0.22, "grad_norm": 0.7801575625897028, "learning_rate": 9.840898157771024e-06, "loss": 0.5602, "step": 1697 }, { "epoch": 0.22, "grad_norm": 0.6269357761719352, "learning_rate": 9.84063989382342e-06, "loss": 0.5295, "step": 1698 }, { "epoch": 0.22, "grad_norm": 0.7248816894100609, "learning_rate": 9.840381423825245e-06, "loss": 0.6128, "step": 1699 }, { "epoch": 0.22, "grad_norm": 0.8856359618494074, "learning_rate": 9.840122747787499e-06, "loss": 0.6154, "step": 1700 }, { "epoch": 0.22, "grad_norm": 0.8144981201588504, "learning_rate": 9.839863865721197e-06, "loss": 0.6517, "step": 1701 }, { "epoch": 0.22, "grad_norm": 0.8074339626714965, "learning_rate": 9.839604777637355e-06, "loss": 0.6454, "step": 1702 }, { "epoch": 0.22, "grad_norm": 0.755912866391722, "learning_rate": 9.839345483547002e-06, "loss": 0.5942, "step": 1703 }, { "epoch": 0.22, "grad_norm": 0.5453412409044164, "learning_rate": 9.839085983461176e-06, "loss": 0.4867, "step": 1704 }, { "epoch": 0.22, "grad_norm": 1.1312225476673403, "learning_rate": 9.838826277390924e-06, "loss": 0.623, "step": 1705 }, { "epoch": 0.22, "grad_norm": 0.5580077144699175, "learning_rate": 9.8385663653473e-06, "loss": 0.5262, "step": 1706 }, { "epoch": 0.22, "grad_norm": 1.0276120472248955, "learning_rate": 9.838306247341368e-06, "loss": 0.6897, "step": 1707 }, { "epoch": 0.22, "grad_norm": 0.6345644554063219, "learning_rate": 9.838045923384198e-06, "loss": 0.5501, "step": 1708 }, { "epoch": 0.22, "grad_norm": 0.6950163591199844, "learning_rate": 9.837785393486875e-06, "loss": 0.5713, "step": 1709 }, { "epoch": 0.22, "grad_norm": 0.7247758833337077, "learning_rate": 9.837524657660486e-06, "loss": 0.6012, "step": 1710 }, { "epoch": 0.22, "grad_norm": 0.8438527231007332, "learning_rate": 9.837263715916132e-06, "loss": 0.6684, "step": 1711 }, { "epoch": 0.22, "grad_norm": 1.003086199282932, "learning_rate": 9.837002568264919e-06, "loss": 0.5959, "step": 1712 }, { "epoch": 0.22, "grad_norm": 0.9463278570698767, "learning_rate": 9.836741214717964e-06, "loss": 0.6049, "step": 1713 }, { "epoch": 0.22, "grad_norm": 0.6365549845329462, "learning_rate": 9.836479655286391e-06, "loss": 0.5541, "step": 1714 }, { "epoch": 0.22, "grad_norm": 0.8799062000288134, "learning_rate": 9.836217889981335e-06, "loss": 0.6678, "step": 1715 }, { "epoch": 0.22, "grad_norm": 0.6781532713920536, "learning_rate": 9.835955918813937e-06, "loss": 0.554, "step": 1716 }, { "epoch": 0.22, "grad_norm": 0.6090068756136409, "learning_rate": 9.835693741795348e-06, "loss": 0.5387, "step": 1717 }, { "epoch": 0.22, "grad_norm": 0.6727395605649175, "learning_rate": 9.83543135893673e-06, "loss": 0.5264, "step": 1718 }, { "epoch": 0.22, "grad_norm": 0.7648070527881605, "learning_rate": 9.83516877024925e-06, "loss": 0.6062, "step": 1719 }, { "epoch": 0.22, "grad_norm": 0.8421626479733515, "learning_rate": 9.83490597574409e-06, "loss": 0.5994, "step": 1720 }, { "epoch": 0.22, "grad_norm": 0.742540896777215, "learning_rate": 9.83464297543243e-06, "loss": 0.5347, "step": 1721 }, { "epoch": 0.22, "grad_norm": 0.8461047171388711, "learning_rate": 9.834379769325466e-06, "loss": 0.6343, "step": 1722 }, { "epoch": 0.22, "grad_norm": 0.6598525582260091, "learning_rate": 9.834116357434407e-06, "loss": 0.59, "step": 1723 }, { "epoch": 0.22, "grad_norm": 0.6924863493852103, "learning_rate": 9.833852739770463e-06, "loss": 0.5899, "step": 1724 }, { "epoch": 0.22, "grad_norm": 0.6556261440230574, "learning_rate": 9.833588916344851e-06, "loss": 0.5352, "step": 1725 }, { "epoch": 0.22, "grad_norm": 0.6164402957827337, "learning_rate": 9.833324887168807e-06, "loss": 0.5855, "step": 1726 }, { "epoch": 0.22, "grad_norm": 0.83313658454367, "learning_rate": 9.833060652253567e-06, "loss": 0.6342, "step": 1727 }, { "epoch": 0.22, "grad_norm": 0.7971140584665952, "learning_rate": 9.83279621161038e-06, "loss": 0.6277, "step": 1728 }, { "epoch": 0.22, "grad_norm": 0.7533113921604073, "learning_rate": 9.832531565250501e-06, "loss": 0.5854, "step": 1729 }, { "epoch": 0.22, "grad_norm": 0.5973559185082606, "learning_rate": 9.832266713185195e-06, "loss": 0.5474, "step": 1730 }, { "epoch": 0.22, "grad_norm": 0.6941911837247505, "learning_rate": 9.832001655425737e-06, "loss": 0.5791, "step": 1731 }, { "epoch": 0.22, "grad_norm": 0.7588200922371693, "learning_rate": 9.83173639198341e-06, "loss": 0.5659, "step": 1732 }, { "epoch": 0.22, "grad_norm": 0.659717998884332, "learning_rate": 9.831470922869506e-06, "loss": 0.5656, "step": 1733 }, { "epoch": 0.22, "grad_norm": 0.792657017702605, "learning_rate": 9.831205248095322e-06, "loss": 0.5704, "step": 1734 }, { "epoch": 0.22, "grad_norm": 0.8794596488407359, "learning_rate": 9.83093936767217e-06, "loss": 0.5796, "step": 1735 }, { "epoch": 0.22, "grad_norm": 0.6368550476640701, "learning_rate": 9.830673281611367e-06, "loss": 0.5795, "step": 1736 }, { "epoch": 0.22, "grad_norm": 0.8113624566405523, "learning_rate": 9.830406989924239e-06, "loss": 0.664, "step": 1737 }, { "epoch": 0.22, "grad_norm": 0.7148748320239768, "learning_rate": 9.83014049262212e-06, "loss": 0.5796, "step": 1738 }, { "epoch": 0.22, "grad_norm": 0.7045937711178463, "learning_rate": 9.829873789716355e-06, "loss": 0.5847, "step": 1739 }, { "epoch": 0.22, "grad_norm": 0.5964086549917872, "learning_rate": 9.829606881218297e-06, "loss": 0.5836, "step": 1740 }, { "epoch": 0.22, "grad_norm": 0.8052988769986834, "learning_rate": 9.829339767139308e-06, "loss": 0.5969, "step": 1741 }, { "epoch": 0.22, "grad_norm": 0.9221403141530807, "learning_rate": 9.82907244749076e-06, "loss": 0.7186, "step": 1742 }, { "epoch": 0.22, "grad_norm": 0.6203895913139273, "learning_rate": 9.828804922284026e-06, "loss": 0.5362, "step": 1743 }, { "epoch": 0.22, "grad_norm": 0.8738324041496631, "learning_rate": 9.828537191530496e-06, "loss": 0.614, "step": 1744 }, { "epoch": 0.22, "grad_norm": 0.7098802141428318, "learning_rate": 9.82826925524157e-06, "loss": 0.6196, "step": 1745 }, { "epoch": 0.22, "grad_norm": 0.6802253944524707, "learning_rate": 9.82800111342865e-06, "loss": 0.5573, "step": 1746 }, { "epoch": 0.22, "grad_norm": 0.8081654337036757, "learning_rate": 9.82773276610315e-06, "loss": 0.6616, "step": 1747 }, { "epoch": 0.22, "grad_norm": 0.7162665211569121, "learning_rate": 9.827464213276494e-06, "loss": 0.5565, "step": 1748 }, { "epoch": 0.22, "grad_norm": 1.2342725550537075, "learning_rate": 9.827195454960115e-06, "loss": 0.6428, "step": 1749 }, { "epoch": 0.22, "grad_norm": 0.8406360594694418, "learning_rate": 9.826926491165448e-06, "loss": 0.6361, "step": 1750 }, { "epoch": 0.22, "grad_norm": 0.7423596412830483, "learning_rate": 9.826657321903946e-06, "loss": 0.6112, "step": 1751 }, { "epoch": 0.22, "grad_norm": 0.6104102388515092, "learning_rate": 9.826387947187066e-06, "loss": 0.516, "step": 1752 }, { "epoch": 0.22, "grad_norm": 0.6846252071575041, "learning_rate": 9.826118367026274e-06, "loss": 0.5526, "step": 1753 }, { "epoch": 0.22, "grad_norm": 0.8553823260980193, "learning_rate": 9.825848581433044e-06, "loss": 0.704, "step": 1754 }, { "epoch": 0.22, "grad_norm": 0.6719827243923429, "learning_rate": 9.825578590418862e-06, "loss": 0.5003, "step": 1755 }, { "epoch": 0.22, "grad_norm": 0.7850911675406774, "learning_rate": 9.82530839399522e-06, "loss": 0.5521, "step": 1756 }, { "epoch": 0.22, "grad_norm": 0.7978646385458676, "learning_rate": 9.825037992173618e-06, "loss": 0.6446, "step": 1757 }, { "epoch": 0.22, "grad_norm": 0.8090192887152099, "learning_rate": 9.824767384965567e-06, "loss": 0.6247, "step": 1758 }, { "epoch": 0.22, "grad_norm": 0.8957108224116083, "learning_rate": 9.82449657238259e-06, "loss": 0.6446, "step": 1759 }, { "epoch": 0.22, "grad_norm": 0.8677758678006399, "learning_rate": 9.824225554436207e-06, "loss": 0.6161, "step": 1760 }, { "epoch": 0.22, "grad_norm": 0.8454027020819754, "learning_rate": 9.82395433113796e-06, "loss": 0.6067, "step": 1761 }, { "epoch": 0.22, "grad_norm": 0.7106692070370019, "learning_rate": 9.82368290249939e-06, "loss": 0.6342, "step": 1762 }, { "epoch": 0.22, "grad_norm": 0.6807978308051146, "learning_rate": 9.823411268532055e-06, "loss": 0.5495, "step": 1763 }, { "epoch": 0.22, "grad_norm": 0.6558366754497376, "learning_rate": 9.823139429247515e-06, "loss": 0.5589, "step": 1764 }, { "epoch": 0.22, "grad_norm": 0.7010054377950301, "learning_rate": 9.822867384657341e-06, "loss": 0.5249, "step": 1765 }, { "epoch": 0.22, "grad_norm": 0.6448064571642647, "learning_rate": 9.822595134773116e-06, "loss": 0.5513, "step": 1766 }, { "epoch": 0.23, "grad_norm": 1.0398190136124312, "learning_rate": 9.822322679606427e-06, "loss": 0.6578, "step": 1767 }, { "epoch": 0.23, "grad_norm": 0.6064319244857759, "learning_rate": 9.822050019168871e-06, "loss": 0.5589, "step": 1768 }, { "epoch": 0.23, "grad_norm": 0.7163008061551831, "learning_rate": 9.821777153472053e-06, "loss": 0.5687, "step": 1769 }, { "epoch": 0.23, "grad_norm": 0.6828781593595651, "learning_rate": 9.821504082527592e-06, "loss": 0.5684, "step": 1770 }, { "epoch": 0.23, "grad_norm": 0.7278664732198938, "learning_rate": 9.821230806347111e-06, "loss": 0.5327, "step": 1771 }, { "epoch": 0.23, "grad_norm": 0.7243893679333716, "learning_rate": 9.820957324942237e-06, "loss": 0.5791, "step": 1772 }, { "epoch": 0.23, "grad_norm": 0.764962784739967, "learning_rate": 9.820683638324618e-06, "loss": 0.5995, "step": 1773 }, { "epoch": 0.23, "grad_norm": 1.1084149493792808, "learning_rate": 9.820409746505901e-06, "loss": 0.6086, "step": 1774 }, { "epoch": 0.23, "grad_norm": 0.6267456203921593, "learning_rate": 9.820135649497744e-06, "loss": 0.5813, "step": 1775 }, { "epoch": 0.23, "grad_norm": 0.9700337915284244, "learning_rate": 9.819861347311815e-06, "loss": 0.6304, "step": 1776 }, { "epoch": 0.23, "grad_norm": 0.9323486483003622, "learning_rate": 9.819586839959792e-06, "loss": 0.6092, "step": 1777 }, { "epoch": 0.23, "grad_norm": 0.8293205415777264, "learning_rate": 9.819312127453358e-06, "loss": 0.6256, "step": 1778 }, { "epoch": 0.23, "grad_norm": 0.7672079298631141, "learning_rate": 9.819037209804205e-06, "loss": 0.6065, "step": 1779 }, { "epoch": 0.23, "grad_norm": 0.6515826109273735, "learning_rate": 9.818762087024039e-06, "loss": 0.5435, "step": 1780 }, { "epoch": 0.23, "grad_norm": 0.6496504007458078, "learning_rate": 9.81848675912457e-06, "loss": 0.5473, "step": 1781 }, { "epoch": 0.23, "grad_norm": 0.5649728998215756, "learning_rate": 9.818211226117517e-06, "loss": 0.5303, "step": 1782 }, { "epoch": 0.23, "grad_norm": 0.7576583466894525, "learning_rate": 9.81793548801461e-06, "loss": 0.6118, "step": 1783 }, { "epoch": 0.23, "grad_norm": 0.6930723487360514, "learning_rate": 9.817659544827582e-06, "loss": 0.547, "step": 1784 }, { "epoch": 0.23, "grad_norm": 0.8219685663116344, "learning_rate": 9.817383396568185e-06, "loss": 0.6618, "step": 1785 }, { "epoch": 0.23, "grad_norm": 0.7419186308064855, "learning_rate": 9.817107043248171e-06, "loss": 0.5586, "step": 1786 }, { "epoch": 0.23, "grad_norm": 0.6580774179682489, "learning_rate": 9.816830484879302e-06, "loss": 0.604, "step": 1787 }, { "epoch": 0.23, "grad_norm": 0.6058216236679842, "learning_rate": 9.816553721473352e-06, "loss": 0.566, "step": 1788 }, { "epoch": 0.23, "grad_norm": 0.7888254498849562, "learning_rate": 9.816276753042103e-06, "loss": 0.645, "step": 1789 }, { "epoch": 0.23, "grad_norm": 0.6141278230424722, "learning_rate": 9.815999579597342e-06, "loss": 0.5402, "step": 1790 }, { "epoch": 0.23, "grad_norm": 0.6808504418226013, "learning_rate": 9.815722201150869e-06, "loss": 0.5652, "step": 1791 }, { "epoch": 0.23, "grad_norm": 0.8140413787945604, "learning_rate": 9.815444617714489e-06, "loss": 0.6463, "step": 1792 }, { "epoch": 0.23, "grad_norm": 0.5419997332132857, "learning_rate": 9.815166829300022e-06, "loss": 0.4861, "step": 1793 }, { "epoch": 0.23, "grad_norm": 0.6864311376367265, "learning_rate": 9.81488883591929e-06, "loss": 0.5347, "step": 1794 }, { "epoch": 0.23, "grad_norm": 0.7049230119572016, "learning_rate": 9.814610637584125e-06, "loss": 0.5836, "step": 1795 }, { "epoch": 0.23, "grad_norm": 0.6704913119275552, "learning_rate": 9.81433223430637e-06, "loss": 0.5475, "step": 1796 }, { "epoch": 0.23, "grad_norm": 0.6143483032874669, "learning_rate": 9.814053626097879e-06, "loss": 0.5557, "step": 1797 }, { "epoch": 0.23, "grad_norm": 0.6345160268573378, "learning_rate": 9.813774812970506e-06, "loss": 0.5294, "step": 1798 }, { "epoch": 0.23, "grad_norm": 0.8201777018395332, "learning_rate": 9.813495794936123e-06, "loss": 0.6206, "step": 1799 }, { "epoch": 0.23, "grad_norm": 1.0710772418338836, "learning_rate": 9.813216572006606e-06, "loss": 0.664, "step": 1800 }, { "epoch": 0.23, "grad_norm": 0.7240729327012577, "learning_rate": 9.812937144193839e-06, "loss": 0.6213, "step": 1801 }, { "epoch": 0.23, "grad_norm": 0.5999673694543911, "learning_rate": 9.81265751150972e-06, "loss": 0.5311, "step": 1802 }, { "epoch": 0.23, "grad_norm": 0.9403646063554219, "learning_rate": 9.812377673966147e-06, "loss": 0.666, "step": 1803 }, { "epoch": 0.23, "grad_norm": 0.6040105186180103, "learning_rate": 9.812097631575037e-06, "loss": 0.5258, "step": 1804 }, { "epoch": 0.23, "grad_norm": 1.0974913112250428, "learning_rate": 9.811817384348306e-06, "loss": 0.6727, "step": 1805 }, { "epoch": 0.23, "grad_norm": 0.8672915396636229, "learning_rate": 9.811536932297887e-06, "loss": 0.5533, "step": 1806 }, { "epoch": 0.23, "grad_norm": 0.6340230627965114, "learning_rate": 9.811256275435716e-06, "loss": 0.5834, "step": 1807 }, { "epoch": 0.23, "grad_norm": 0.8718616880804044, "learning_rate": 9.81097541377374e-06, "loss": 0.6046, "step": 1808 }, { "epoch": 0.23, "grad_norm": 0.7195864741878601, "learning_rate": 9.810694347323913e-06, "loss": 0.5896, "step": 1809 }, { "epoch": 0.23, "grad_norm": 0.834375755837981, "learning_rate": 9.810413076098201e-06, "loss": 0.6209, "step": 1810 }, { "epoch": 0.23, "grad_norm": 1.0459165288147858, "learning_rate": 9.810131600108577e-06, "loss": 0.7057, "step": 1811 }, { "epoch": 0.23, "grad_norm": 0.9085264408735688, "learning_rate": 9.809849919367021e-06, "loss": 0.6524, "step": 1812 }, { "epoch": 0.23, "grad_norm": 0.6245803370399223, "learning_rate": 9.809568033885523e-06, "loss": 0.5362, "step": 1813 }, { "epoch": 0.23, "grad_norm": 0.62992976780188, "learning_rate": 9.809285943676086e-06, "loss": 0.5811, "step": 1814 }, { "epoch": 0.23, "grad_norm": 0.7588014992492863, "learning_rate": 9.809003648750712e-06, "loss": 0.6019, "step": 1815 }, { "epoch": 0.23, "grad_norm": 0.8454433772131614, "learning_rate": 9.808721149121421e-06, "loss": 0.6199, "step": 1816 }, { "epoch": 0.23, "grad_norm": 0.6311787764848752, "learning_rate": 9.808438444800238e-06, "loss": 0.5422, "step": 1817 }, { "epoch": 0.23, "grad_norm": 0.7647311379504511, "learning_rate": 9.808155535799196e-06, "loss": 0.6039, "step": 1818 }, { "epoch": 0.23, "grad_norm": 0.6859108418964911, "learning_rate": 9.807872422130336e-06, "loss": 0.5781, "step": 1819 }, { "epoch": 0.23, "grad_norm": 0.7272285110153806, "learning_rate": 9.80758910380571e-06, "loss": 0.5255, "step": 1820 }, { "epoch": 0.23, "grad_norm": 0.9282585204580677, "learning_rate": 9.80730558083738e-06, "loss": 0.6676, "step": 1821 }, { "epoch": 0.23, "grad_norm": 0.7771576518488775, "learning_rate": 9.807021853237415e-06, "loss": 0.6327, "step": 1822 }, { "epoch": 0.23, "grad_norm": 0.8991058364926697, "learning_rate": 9.806737921017889e-06, "loss": 0.5972, "step": 1823 }, { "epoch": 0.23, "grad_norm": 0.6311085357228988, "learning_rate": 9.80645378419089e-06, "loss": 0.5644, "step": 1824 }, { "epoch": 0.23, "grad_norm": 0.7044051154406225, "learning_rate": 9.806169442768512e-06, "loss": 0.6508, "step": 1825 }, { "epoch": 0.23, "grad_norm": 4.377727237075721, "learning_rate": 9.805884896762861e-06, "loss": 0.6146, "step": 1826 }, { "epoch": 0.23, "grad_norm": 0.6541557095893447, "learning_rate": 9.805600146186045e-06, "loss": 0.535, "step": 1827 }, { "epoch": 0.23, "grad_norm": 0.7937694622750345, "learning_rate": 9.805315191050189e-06, "loss": 0.6149, "step": 1828 }, { "epoch": 0.23, "grad_norm": 0.6882330971401761, "learning_rate": 9.805030031367421e-06, "loss": 0.5604, "step": 1829 }, { "epoch": 0.23, "grad_norm": 0.5556525189442758, "learning_rate": 9.804744667149878e-06, "loss": 0.4953, "step": 1830 }, { "epoch": 0.23, "grad_norm": 0.6249957602193592, "learning_rate": 9.80445909840971e-06, "loss": 0.5598, "step": 1831 }, { "epoch": 0.23, "grad_norm": 0.6452773557447997, "learning_rate": 9.80417332515907e-06, "loss": 0.5606, "step": 1832 }, { "epoch": 0.23, "grad_norm": 0.881344799149902, "learning_rate": 9.803887347410123e-06, "loss": 0.617, "step": 1833 }, { "epoch": 0.23, "grad_norm": 0.7228633029360589, "learning_rate": 9.803601165175043e-06, "loss": 0.5779, "step": 1834 }, { "epoch": 0.23, "grad_norm": 0.6328450543531903, "learning_rate": 9.803314778466011e-06, "loss": 0.5072, "step": 1835 }, { "epoch": 0.23, "grad_norm": 0.6878082965265847, "learning_rate": 9.803028187295218e-06, "loss": 0.5446, "step": 1836 }, { "epoch": 0.23, "grad_norm": 0.7561265841790251, "learning_rate": 9.802741391674864e-06, "loss": 0.6078, "step": 1837 }, { "epoch": 0.23, "grad_norm": 1.5269171983317034, "learning_rate": 9.802454391617158e-06, "loss": 0.6168, "step": 1838 }, { "epoch": 0.23, "grad_norm": 0.662341341988264, "learning_rate": 9.802167187134312e-06, "loss": 0.5304, "step": 1839 }, { "epoch": 0.23, "grad_norm": 0.6361927091599409, "learning_rate": 9.801879778238557e-06, "loss": 0.5468, "step": 1840 }, { "epoch": 0.23, "grad_norm": 0.8665692683559979, "learning_rate": 9.801592164942122e-06, "loss": 0.654, "step": 1841 }, { "epoch": 0.23, "grad_norm": 0.5794446386427597, "learning_rate": 9.801304347257255e-06, "loss": 0.514, "step": 1842 }, { "epoch": 0.23, "grad_norm": 0.565083604405036, "learning_rate": 9.801016325196201e-06, "loss": 0.4536, "step": 1843 }, { "epoch": 0.23, "grad_norm": 0.7119210720305069, "learning_rate": 9.800728098771227e-06, "loss": 0.5877, "step": 1844 }, { "epoch": 0.24, "grad_norm": 0.6911282038193561, "learning_rate": 9.800439667994597e-06, "loss": 0.5466, "step": 1845 }, { "epoch": 0.24, "grad_norm": 1.1006291115988809, "learning_rate": 9.800151032878593e-06, "loss": 0.6649, "step": 1846 }, { "epoch": 0.24, "grad_norm": 0.7941378228483236, "learning_rate": 9.799862193435496e-06, "loss": 0.6036, "step": 1847 }, { "epoch": 0.24, "grad_norm": 0.7007455534210305, "learning_rate": 9.799573149677604e-06, "loss": 0.5798, "step": 1848 }, { "epoch": 0.24, "grad_norm": 0.7955570218586099, "learning_rate": 9.79928390161722e-06, "loss": 0.6842, "step": 1849 }, { "epoch": 0.24, "grad_norm": 0.6485587476088552, "learning_rate": 9.798994449266657e-06, "loss": 0.5086, "step": 1850 }, { "epoch": 0.24, "grad_norm": 0.7990949334892158, "learning_rate": 9.798704792638235e-06, "loss": 0.6534, "step": 1851 }, { "epoch": 0.24, "grad_norm": 0.6409614614806528, "learning_rate": 9.798414931744286e-06, "loss": 0.5419, "step": 1852 }, { "epoch": 0.24, "grad_norm": 0.8167959177032953, "learning_rate": 9.798124866597148e-06, "loss": 0.6005, "step": 1853 }, { "epoch": 0.24, "grad_norm": 0.653313874850728, "learning_rate": 9.797834597209164e-06, "loss": 0.5686, "step": 1854 }, { "epoch": 0.24, "grad_norm": 0.7840320310347608, "learning_rate": 9.797544123592694e-06, "loss": 0.6048, "step": 1855 }, { "epoch": 0.24, "grad_norm": 0.6548722236822639, "learning_rate": 9.797253445760103e-06, "loss": 0.471, "step": 1856 }, { "epoch": 0.24, "grad_norm": 0.8512904379792536, "learning_rate": 9.796962563723761e-06, "loss": 0.6613, "step": 1857 }, { "epoch": 0.24, "grad_norm": 0.9699865123154551, "learning_rate": 9.796671477496052e-06, "loss": 0.7084, "step": 1858 }, { "epoch": 0.24, "grad_norm": 1.2359218981242521, "learning_rate": 9.796380187089365e-06, "loss": 0.7023, "step": 1859 }, { "epoch": 0.24, "grad_norm": 0.6176424523894019, "learning_rate": 9.7960886925161e-06, "loss": 0.5307, "step": 1860 }, { "epoch": 0.24, "grad_norm": 0.6457671000241276, "learning_rate": 9.795796993788669e-06, "loss": 0.5582, "step": 1861 }, { "epoch": 0.24, "grad_norm": 0.8170632193767384, "learning_rate": 9.795505090919483e-06, "loss": 0.6949, "step": 1862 }, { "epoch": 0.24, "grad_norm": 0.6247440359342894, "learning_rate": 9.79521298392097e-06, "loss": 0.5519, "step": 1863 }, { "epoch": 0.24, "grad_norm": 0.8991623154905343, "learning_rate": 9.794920672805563e-06, "loss": 0.6027, "step": 1864 }, { "epoch": 0.24, "grad_norm": 0.6371668104566895, "learning_rate": 9.794628157585705e-06, "loss": 0.5482, "step": 1865 }, { "epoch": 0.24, "grad_norm": 0.7367620987083294, "learning_rate": 9.794335438273847e-06, "loss": 0.6282, "step": 1866 }, { "epoch": 0.24, "grad_norm": 0.8788005777652736, "learning_rate": 9.794042514882453e-06, "loss": 0.6181, "step": 1867 }, { "epoch": 0.24, "grad_norm": 2.2160155982655554, "learning_rate": 9.793749387423986e-06, "loss": 0.6313, "step": 1868 }, { "epoch": 0.24, "grad_norm": 0.6875203175738123, "learning_rate": 9.793456055910929e-06, "loss": 0.5657, "step": 1869 }, { "epoch": 0.24, "grad_norm": 0.620390393773217, "learning_rate": 9.79316252035576e-06, "loss": 0.5636, "step": 1870 }, { "epoch": 0.24, "grad_norm": 0.8060927637625144, "learning_rate": 9.792868780770984e-06, "loss": 0.6076, "step": 1871 }, { "epoch": 0.24, "grad_norm": 0.7892093311624908, "learning_rate": 9.7925748371691e-06, "loss": 0.6325, "step": 1872 }, { "epoch": 0.24, "grad_norm": 0.6340907113812765, "learning_rate": 9.792280689562617e-06, "loss": 0.5467, "step": 1873 }, { "epoch": 0.24, "grad_norm": 0.7812304518114844, "learning_rate": 9.791986337964061e-06, "loss": 0.5487, "step": 1874 }, { "epoch": 0.24, "grad_norm": 0.6868687819751733, "learning_rate": 9.79169178238596e-06, "loss": 0.5666, "step": 1875 }, { "epoch": 0.24, "grad_norm": 0.7470990444460175, "learning_rate": 9.79139702284085e-06, "loss": 0.5775, "step": 1876 }, { "epoch": 0.24, "grad_norm": 0.669118934532083, "learning_rate": 9.791102059341282e-06, "loss": 0.5375, "step": 1877 }, { "epoch": 0.24, "grad_norm": 0.5789565813247094, "learning_rate": 9.790806891899807e-06, "loss": 0.5067, "step": 1878 }, { "epoch": 0.24, "grad_norm": 0.8735880122952258, "learning_rate": 9.790511520528994e-06, "loss": 0.655, "step": 1879 }, { "epoch": 0.24, "grad_norm": 0.7434939390969325, "learning_rate": 9.790215945241414e-06, "loss": 0.6341, "step": 1880 }, { "epoch": 0.24, "grad_norm": 0.6660027918812905, "learning_rate": 9.789920166049646e-06, "loss": 0.5673, "step": 1881 }, { "epoch": 0.24, "grad_norm": 0.8022913933066801, "learning_rate": 9.789624182966287e-06, "loss": 0.6094, "step": 1882 }, { "epoch": 0.24, "grad_norm": 0.6770294527721105, "learning_rate": 9.789327996003928e-06, "loss": 0.5774, "step": 1883 }, { "epoch": 0.24, "grad_norm": 0.94355827439604, "learning_rate": 9.789031605175183e-06, "loss": 0.6976, "step": 1884 }, { "epoch": 0.24, "grad_norm": 0.7256597176856311, "learning_rate": 9.788735010492665e-06, "loss": 0.5596, "step": 1885 }, { "epoch": 0.24, "grad_norm": 0.8615465433468593, "learning_rate": 9.788438211969e-06, "loss": 0.6355, "step": 1886 }, { "epoch": 0.24, "grad_norm": 0.8250046633773639, "learning_rate": 9.788141209616823e-06, "loss": 0.6403, "step": 1887 }, { "epoch": 0.24, "grad_norm": 0.7262155352396545, "learning_rate": 9.787844003448776e-06, "loss": 0.6173, "step": 1888 }, { "epoch": 0.24, "grad_norm": 0.6120776219419543, "learning_rate": 9.787546593477509e-06, "loss": 0.555, "step": 1889 }, { "epoch": 0.24, "grad_norm": 0.5843370001787598, "learning_rate": 9.78724897971568e-06, "loss": 0.5397, "step": 1890 }, { "epoch": 0.24, "grad_norm": 0.7438143537722247, "learning_rate": 9.786951162175961e-06, "loss": 0.6256, "step": 1891 }, { "epoch": 0.24, "grad_norm": 0.6842285821982648, "learning_rate": 9.786653140871029e-06, "loss": 0.5712, "step": 1892 }, { "epoch": 0.24, "grad_norm": 1.080874953127134, "learning_rate": 9.78635491581357e-06, "loss": 0.6835, "step": 1893 }, { "epoch": 0.24, "grad_norm": 0.7851095478476103, "learning_rate": 9.786056487016276e-06, "loss": 0.5984, "step": 1894 }, { "epoch": 0.24, "grad_norm": 0.9732872864863693, "learning_rate": 9.785757854491851e-06, "loss": 0.6524, "step": 1895 }, { "epoch": 0.24, "grad_norm": 0.6817606329799621, "learning_rate": 9.785459018253007e-06, "loss": 0.5323, "step": 1896 }, { "epoch": 0.24, "grad_norm": 0.5777681519472451, "learning_rate": 9.785159978312465e-06, "loss": 0.5827, "step": 1897 }, { "epoch": 0.24, "grad_norm": 0.9055026562937675, "learning_rate": 9.784860734682954e-06, "loss": 0.7013, "step": 1898 }, { "epoch": 0.24, "grad_norm": 0.6536385790471594, "learning_rate": 9.78456128737721e-06, "loss": 0.5104, "step": 1899 }, { "epoch": 0.24, "grad_norm": 0.6870498844950168, "learning_rate": 9.784261636407983e-06, "loss": 0.5769, "step": 1900 }, { "epoch": 0.24, "grad_norm": 0.564708476266616, "learning_rate": 9.783961781788027e-06, "loss": 0.4899, "step": 1901 }, { "epoch": 0.24, "grad_norm": 0.6478259646680682, "learning_rate": 9.783661723530105e-06, "loss": 0.5143, "step": 1902 }, { "epoch": 0.24, "grad_norm": 0.5663585499776949, "learning_rate": 9.78336146164699e-06, "loss": 0.5137, "step": 1903 }, { "epoch": 0.24, "grad_norm": 0.5878041448139183, "learning_rate": 9.783060996151461e-06, "loss": 0.5116, "step": 1904 }, { "epoch": 0.24, "grad_norm": 0.7619117929481182, "learning_rate": 9.78276032705631e-06, "loss": 0.6297, "step": 1905 }, { "epoch": 0.24, "grad_norm": 0.8115502761321896, "learning_rate": 9.782459454374337e-06, "loss": 0.5855, "step": 1906 }, { "epoch": 0.24, "grad_norm": 0.7862715666596223, "learning_rate": 9.78215837811835e-06, "loss": 0.6286, "step": 1907 }, { "epoch": 0.24, "grad_norm": 0.681053413459562, "learning_rate": 9.781857098301158e-06, "loss": 0.5786, "step": 1908 }, { "epoch": 0.24, "grad_norm": 0.5921885004324525, "learning_rate": 9.781555614935591e-06, "loss": 0.5684, "step": 1909 }, { "epoch": 0.24, "grad_norm": 1.001601564863681, "learning_rate": 9.781253928034483e-06, "loss": 0.6666, "step": 1910 }, { "epoch": 0.24, "grad_norm": 0.5639342154583687, "learning_rate": 9.780952037610674e-06, "loss": 0.4926, "step": 1911 }, { "epoch": 0.24, "grad_norm": 0.948207088170478, "learning_rate": 9.780649943677013e-06, "loss": 0.6486, "step": 1912 }, { "epoch": 0.24, "grad_norm": 0.731045565932882, "learning_rate": 9.780347646246359e-06, "loss": 0.6269, "step": 1913 }, { "epoch": 0.24, "grad_norm": 0.7373371843437875, "learning_rate": 9.780045145331585e-06, "loss": 0.5573, "step": 1914 }, { "epoch": 0.24, "grad_norm": 0.8560864430134707, "learning_rate": 9.779742440945563e-06, "loss": 0.64, "step": 1915 }, { "epoch": 0.24, "grad_norm": 1.3035260561724835, "learning_rate": 9.779439533101178e-06, "loss": 0.6405, "step": 1916 }, { "epoch": 0.24, "grad_norm": 0.8829823219148513, "learning_rate": 9.779136421811326e-06, "loss": 0.6577, "step": 1917 }, { "epoch": 0.24, "grad_norm": 0.9189240436124142, "learning_rate": 9.778833107088907e-06, "loss": 0.6538, "step": 1918 }, { "epoch": 0.24, "grad_norm": 0.8070312100696989, "learning_rate": 9.778529588946835e-06, "loss": 0.6585, "step": 1919 }, { "epoch": 0.24, "grad_norm": 0.6162431523006736, "learning_rate": 9.778225867398027e-06, "loss": 0.5574, "step": 1920 }, { "epoch": 0.24, "grad_norm": 0.7038298018322586, "learning_rate": 9.777921942455414e-06, "loss": 0.5793, "step": 1921 }, { "epoch": 0.24, "grad_norm": 0.8130947236153864, "learning_rate": 9.777617814131934e-06, "loss": 0.6469, "step": 1922 }, { "epoch": 0.24, "grad_norm": 0.6830042517603784, "learning_rate": 9.777313482440528e-06, "loss": 0.5905, "step": 1923 }, { "epoch": 0.25, "grad_norm": 0.7720600364389372, "learning_rate": 9.777008947394152e-06, "loss": 0.625, "step": 1924 }, { "epoch": 0.25, "grad_norm": 0.7757763888398167, "learning_rate": 9.776704209005774e-06, "loss": 0.6185, "step": 1925 }, { "epoch": 0.25, "grad_norm": 0.7150199268120511, "learning_rate": 9.77639926728836e-06, "loss": 0.5528, "step": 1926 }, { "epoch": 0.25, "grad_norm": 0.6747235250631582, "learning_rate": 9.776094122254892e-06, "loss": 0.6118, "step": 1927 }, { "epoch": 0.25, "grad_norm": 0.5501730011096506, "learning_rate": 9.77578877391836e-06, "loss": 0.5672, "step": 1928 }, { "epoch": 0.25, "grad_norm": 0.6747575054042125, "learning_rate": 9.775483222291762e-06, "loss": 0.579, "step": 1929 }, { "epoch": 0.25, "grad_norm": 0.6437417447268526, "learning_rate": 9.775177467388101e-06, "loss": 0.5662, "step": 1930 }, { "epoch": 0.25, "grad_norm": 0.5818262316647661, "learning_rate": 9.774871509220396e-06, "loss": 0.5539, "step": 1931 }, { "epoch": 0.25, "grad_norm": 0.6384674367901276, "learning_rate": 9.77456534780167e-06, "loss": 0.4779, "step": 1932 }, { "epoch": 0.25, "grad_norm": 0.6626283438878735, "learning_rate": 9.774258983144952e-06, "loss": 0.5772, "step": 1933 }, { "epoch": 0.25, "grad_norm": 1.0488726515835534, "learning_rate": 9.773952415263288e-06, "loss": 0.6297, "step": 1934 }, { "epoch": 0.25, "grad_norm": 0.6240739938305208, "learning_rate": 9.773645644169724e-06, "loss": 0.5175, "step": 1935 }, { "epoch": 0.25, "grad_norm": 0.8390389841799185, "learning_rate": 9.77333866987732e-06, "loss": 0.6576, "step": 1936 }, { "epoch": 0.25, "grad_norm": 0.8315159132638323, "learning_rate": 9.77303149239914e-06, "loss": 0.5954, "step": 1937 }, { "epoch": 0.25, "grad_norm": 0.6388266061400573, "learning_rate": 9.772724111748265e-06, "loss": 0.5862, "step": 1938 }, { "epoch": 0.25, "grad_norm": 1.1806161302822178, "learning_rate": 9.772416527937774e-06, "loss": 0.6431, "step": 1939 }, { "epoch": 0.25, "grad_norm": 0.7005717575610654, "learning_rate": 9.772108740980764e-06, "loss": 0.5804, "step": 1940 }, { "epoch": 0.25, "grad_norm": 0.5913186514354882, "learning_rate": 9.77180075089033e-06, "loss": 0.5499, "step": 1941 }, { "epoch": 0.25, "grad_norm": 0.6922462934160454, "learning_rate": 9.77149255767959e-06, "loss": 0.5241, "step": 1942 }, { "epoch": 0.25, "grad_norm": 0.77347767174598, "learning_rate": 9.77118416136166e-06, "loss": 0.6407, "step": 1943 }, { "epoch": 0.25, "grad_norm": 0.8038559729479153, "learning_rate": 9.770875561949665e-06, "loss": 0.6179, "step": 1944 }, { "epoch": 0.25, "grad_norm": 0.6279371308770109, "learning_rate": 9.770566759456743e-06, "loss": 0.5585, "step": 1945 }, { "epoch": 0.25, "grad_norm": 0.7289729212128583, "learning_rate": 9.770257753896038e-06, "loss": 0.5612, "step": 1946 }, { "epoch": 0.25, "grad_norm": 0.615149968987624, "learning_rate": 9.769948545280705e-06, "loss": 0.5887, "step": 1947 }, { "epoch": 0.25, "grad_norm": 0.6851751966271976, "learning_rate": 9.769639133623905e-06, "loss": 0.5275, "step": 1948 }, { "epoch": 0.25, "grad_norm": 0.5536769005121086, "learning_rate": 9.769329518938808e-06, "loss": 0.5175, "step": 1949 }, { "epoch": 0.25, "grad_norm": 0.6095844296252171, "learning_rate": 9.769019701238595e-06, "loss": 0.508, "step": 1950 }, { "epoch": 0.25, "grad_norm": 0.6736914616040667, "learning_rate": 9.768709680536453e-06, "loss": 0.5366, "step": 1951 }, { "epoch": 0.25, "grad_norm": 0.637199326869723, "learning_rate": 9.768399456845577e-06, "loss": 0.5953, "step": 1952 }, { "epoch": 0.25, "grad_norm": 0.8049306836349139, "learning_rate": 9.768089030179175e-06, "loss": 0.583, "step": 1953 }, { "epoch": 0.25, "grad_norm": 0.6303732958697623, "learning_rate": 9.767778400550459e-06, "loss": 0.5499, "step": 1954 }, { "epoch": 0.25, "grad_norm": 0.6099271068664792, "learning_rate": 9.767467567972653e-06, "loss": 0.59, "step": 1955 }, { "epoch": 0.25, "grad_norm": 0.7435821689088031, "learning_rate": 9.767156532458985e-06, "loss": 0.6661, "step": 1956 }, { "epoch": 0.25, "grad_norm": 0.7438129889723072, "learning_rate": 9.7668452940227e-06, "loss": 0.6034, "step": 1957 }, { "epoch": 0.25, "grad_norm": 1.112723556132667, "learning_rate": 9.766533852677042e-06, "loss": 0.5991, "step": 1958 }, { "epoch": 0.25, "grad_norm": 0.6311477087148216, "learning_rate": 9.76622220843527e-06, "loss": 0.5811, "step": 1959 }, { "epoch": 0.25, "grad_norm": 0.6599377713956334, "learning_rate": 9.765910361310648e-06, "loss": 0.5685, "step": 1960 }, { "epoch": 0.25, "grad_norm": 0.6476688505922261, "learning_rate": 9.765598311316453e-06, "loss": 0.5605, "step": 1961 }, { "epoch": 0.25, "grad_norm": 1.0305340753836008, "learning_rate": 9.765286058465966e-06, "loss": 0.6607, "step": 1962 }, { "epoch": 0.25, "grad_norm": 1.044994744196121, "learning_rate": 9.764973602772479e-06, "loss": 0.6493, "step": 1963 }, { "epoch": 0.25, "grad_norm": 0.6759009554840261, "learning_rate": 9.764660944249294e-06, "loss": 0.5395, "step": 1964 }, { "epoch": 0.25, "grad_norm": 0.6588819199833671, "learning_rate": 9.764348082909716e-06, "loss": 0.5729, "step": 1965 }, { "epoch": 0.25, "grad_norm": 1.5676185798033166, "learning_rate": 9.764035018767066e-06, "loss": 0.6366, "step": 1966 }, { "epoch": 0.25, "grad_norm": 0.7488411094021024, "learning_rate": 9.763721751834669e-06, "loss": 0.6195, "step": 1967 }, { "epoch": 0.25, "grad_norm": 0.7798042275961441, "learning_rate": 9.763408282125859e-06, "loss": 0.6319, "step": 1968 }, { "epoch": 0.25, "grad_norm": 0.6113906584451468, "learning_rate": 9.76309460965398e-06, "loss": 0.5173, "step": 1969 }, { "epoch": 0.25, "grad_norm": 0.9364621296730903, "learning_rate": 9.762780734432385e-06, "loss": 0.6356, "step": 1970 }, { "epoch": 0.25, "grad_norm": 0.5893345078880406, "learning_rate": 9.762466656474434e-06, "loss": 0.5172, "step": 1971 }, { "epoch": 0.25, "grad_norm": 0.9155751022027765, "learning_rate": 9.762152375793494e-06, "loss": 0.5782, "step": 1972 }, { "epoch": 0.25, "grad_norm": 0.5915185565742986, "learning_rate": 9.761837892402947e-06, "loss": 0.4789, "step": 1973 }, { "epoch": 0.25, "grad_norm": 0.7598180720449295, "learning_rate": 9.761523206316178e-06, "loss": 0.6276, "step": 1974 }, { "epoch": 0.25, "grad_norm": 0.6946721467354218, "learning_rate": 9.76120831754658e-06, "loss": 0.5663, "step": 1975 }, { "epoch": 0.25, "grad_norm": 0.7861411569330273, "learning_rate": 9.76089322610756e-06, "loss": 0.6307, "step": 1976 }, { "epoch": 0.25, "grad_norm": 0.7665265029916419, "learning_rate": 9.760577932012529e-06, "loss": 0.6186, "step": 1977 }, { "epoch": 0.25, "grad_norm": 0.6485763026996052, "learning_rate": 9.760262435274907e-06, "loss": 0.5554, "step": 1978 }, { "epoch": 0.25, "grad_norm": 1.0154239397820972, "learning_rate": 9.759946735908125e-06, "loss": 0.6091, "step": 1979 }, { "epoch": 0.25, "grad_norm": 0.6333491185463955, "learning_rate": 9.759630833925622e-06, "loss": 0.5214, "step": 1980 }, { "epoch": 0.25, "grad_norm": 0.8024634694331682, "learning_rate": 9.759314729340843e-06, "loss": 0.5984, "step": 1981 }, { "epoch": 0.25, "grad_norm": 0.5994701254065763, "learning_rate": 9.758998422167245e-06, "loss": 0.5424, "step": 1982 }, { "epoch": 0.25, "grad_norm": 0.8043182819282072, "learning_rate": 9.758681912418292e-06, "loss": 0.6087, "step": 1983 }, { "epoch": 0.25, "grad_norm": 0.6638797433462842, "learning_rate": 9.758365200107455e-06, "loss": 0.5653, "step": 1984 }, { "epoch": 0.25, "grad_norm": 0.8512709591686907, "learning_rate": 9.75804828524822e-06, "loss": 0.6407, "step": 1985 }, { "epoch": 0.25, "grad_norm": 0.7922522876482098, "learning_rate": 9.757731167854072e-06, "loss": 0.5331, "step": 1986 }, { "epoch": 0.25, "grad_norm": 0.6068064293105282, "learning_rate": 9.757413847938512e-06, "loss": 0.5123, "step": 1987 }, { "epoch": 0.25, "grad_norm": 0.8246795405287417, "learning_rate": 9.757096325515047e-06, "loss": 0.6201, "step": 1988 }, { "epoch": 0.25, "grad_norm": 0.9882681574025354, "learning_rate": 9.756778600597193e-06, "loss": 0.6364, "step": 1989 }, { "epoch": 0.25, "grad_norm": 1.0554044969170837, "learning_rate": 9.756460673198474e-06, "loss": 0.587, "step": 1990 }, { "epoch": 0.25, "grad_norm": 0.8535287115138298, "learning_rate": 9.756142543332424e-06, "loss": 0.6933, "step": 1991 }, { "epoch": 0.25, "grad_norm": 0.7035383973149673, "learning_rate": 9.755824211012585e-06, "loss": 0.5658, "step": 1992 }, { "epoch": 0.25, "grad_norm": 0.6204193016440039, "learning_rate": 9.755505676252506e-06, "loss": 0.497, "step": 1993 }, { "epoch": 0.25, "grad_norm": 0.757533191159159, "learning_rate": 9.755186939065746e-06, "loss": 0.6107, "step": 1994 }, { "epoch": 0.25, "grad_norm": 0.6809709597015329, "learning_rate": 9.754867999465876e-06, "loss": 0.5836, "step": 1995 }, { "epoch": 0.25, "grad_norm": 0.6514669512667115, "learning_rate": 9.754548857466468e-06, "loss": 0.549, "step": 1996 }, { "epoch": 0.25, "grad_norm": 0.5753757412548579, "learning_rate": 9.754229513081109e-06, "loss": 0.5024, "step": 1997 }, { "epoch": 0.25, "grad_norm": 0.898139757103797, "learning_rate": 9.753909966323389e-06, "loss": 0.6627, "step": 1998 }, { "epoch": 0.25, "grad_norm": 0.7743361101681899, "learning_rate": 9.753590217206917e-06, "loss": 0.5513, "step": 1999 }, { "epoch": 0.25, "grad_norm": 0.8926775684335282, "learning_rate": 9.753270265745298e-06, "loss": 0.6801, "step": 2000 }, { "epoch": 0.25, "grad_norm": 0.905779177290222, "learning_rate": 9.752950111952153e-06, "loss": 0.6537, "step": 2001 }, { "epoch": 0.26, "grad_norm": 0.587992147442533, "learning_rate": 9.75262975584111e-06, "loss": 0.4905, "step": 2002 }, { "epoch": 0.26, "grad_norm": 0.6776931912155311, "learning_rate": 9.752309197425807e-06, "loss": 0.5584, "step": 2003 }, { "epoch": 0.26, "grad_norm": 0.9758447500493901, "learning_rate": 9.751988436719886e-06, "loss": 0.6163, "step": 2004 }, { "epoch": 0.26, "grad_norm": 0.786220018553936, "learning_rate": 9.751667473737003e-06, "loss": 0.6614, "step": 2005 }, { "epoch": 0.26, "grad_norm": 0.8054724299259647, "learning_rate": 9.751346308490819e-06, "loss": 0.6208, "step": 2006 }, { "epoch": 0.26, "grad_norm": 0.6953486716148269, "learning_rate": 9.751024940995008e-06, "loss": 0.5441, "step": 2007 }, { "epoch": 0.26, "grad_norm": 0.7116303093210296, "learning_rate": 9.750703371263246e-06, "loss": 0.5668, "step": 2008 }, { "epoch": 0.26, "grad_norm": 0.6128615339599108, "learning_rate": 9.750381599309223e-06, "loss": 0.5103, "step": 2009 }, { "epoch": 0.26, "grad_norm": 0.5895524545957406, "learning_rate": 9.750059625146634e-06, "loss": 0.5451, "step": 2010 }, { "epoch": 0.26, "grad_norm": 0.7106416463269892, "learning_rate": 9.749737448789188e-06, "loss": 0.5748, "step": 2011 }, { "epoch": 0.26, "grad_norm": 0.9539169606527462, "learning_rate": 9.749415070250595e-06, "loss": 0.616, "step": 2012 }, { "epoch": 0.26, "grad_norm": 0.8121783756099556, "learning_rate": 9.749092489544578e-06, "loss": 0.661, "step": 2013 }, { "epoch": 0.26, "grad_norm": 0.5523981654354464, "learning_rate": 9.748769706684872e-06, "loss": 0.4868, "step": 2014 }, { "epoch": 0.26, "grad_norm": 0.9279060732475999, "learning_rate": 9.748446721685214e-06, "loss": 0.67, "step": 2015 }, { "epoch": 0.26, "grad_norm": 0.6502826087472836, "learning_rate": 9.748123534559353e-06, "loss": 0.5581, "step": 2016 }, { "epoch": 0.26, "grad_norm": 0.9561047719262271, "learning_rate": 9.747800145321047e-06, "loss": 0.6016, "step": 2017 }, { "epoch": 0.26, "grad_norm": 0.6914834687803977, "learning_rate": 9.74747655398406e-06, "loss": 0.5959, "step": 2018 }, { "epoch": 0.26, "grad_norm": 0.9461309182534641, "learning_rate": 9.747152760562165e-06, "loss": 0.6504, "step": 2019 }, { "epoch": 0.26, "grad_norm": 0.6677868296472617, "learning_rate": 9.746828765069148e-06, "loss": 0.5674, "step": 2020 }, { "epoch": 0.26, "grad_norm": 0.7205211983597987, "learning_rate": 9.7465045675188e-06, "loss": 0.5707, "step": 2021 }, { "epoch": 0.26, "grad_norm": 0.8565289265491989, "learning_rate": 9.746180167924919e-06, "loss": 0.5994, "step": 2022 }, { "epoch": 0.26, "grad_norm": 0.7827491188198623, "learning_rate": 9.745855566301315e-06, "loss": 0.6366, "step": 2023 }, { "epoch": 0.26, "grad_norm": 0.8513641718980255, "learning_rate": 9.745530762661805e-06, "loss": 0.6954, "step": 2024 }, { "epoch": 0.26, "grad_norm": 0.9325634003810489, "learning_rate": 9.745205757020216e-06, "loss": 0.6333, "step": 2025 }, { "epoch": 0.26, "grad_norm": 0.6288848026923852, "learning_rate": 9.744880549390382e-06, "loss": 0.5053, "step": 2026 }, { "epoch": 0.26, "grad_norm": 0.7933685981261801, "learning_rate": 9.744555139786142e-06, "loss": 0.5391, "step": 2027 }, { "epoch": 0.26, "grad_norm": 1.8354595558937188, "learning_rate": 9.744229528221354e-06, "loss": 0.572, "step": 2028 }, { "epoch": 0.26, "grad_norm": 0.6365256178368215, "learning_rate": 9.743903714709875e-06, "loss": 0.5329, "step": 2029 }, { "epoch": 0.26, "grad_norm": 1.0296411852335627, "learning_rate": 9.743577699265574e-06, "loss": 0.694, "step": 2030 }, { "epoch": 0.26, "grad_norm": 0.5709720494840266, "learning_rate": 9.743251481902329e-06, "loss": 0.5204, "step": 2031 }, { "epoch": 0.26, "grad_norm": 0.7712303990324043, "learning_rate": 9.742925062634025e-06, "loss": 0.5535, "step": 2032 }, { "epoch": 0.26, "grad_norm": 1.036540970751146, "learning_rate": 9.742598441474558e-06, "loss": 0.6278, "step": 2033 }, { "epoch": 0.26, "grad_norm": 0.7719481581583385, "learning_rate": 9.74227161843783e-06, "loss": 0.6635, "step": 2034 }, { "epoch": 0.26, "grad_norm": 0.7193126175204697, "learning_rate": 9.741944593537754e-06, "loss": 0.5871, "step": 2035 }, { "epoch": 0.26, "grad_norm": 0.5914133858909755, "learning_rate": 9.74161736678825e-06, "loss": 0.5482, "step": 2036 }, { "epoch": 0.26, "grad_norm": 0.8380293249144736, "learning_rate": 9.741289938203246e-06, "loss": 0.5469, "step": 2037 }, { "epoch": 0.26, "grad_norm": 0.717292077984541, "learning_rate": 9.74096230779668e-06, "loss": 0.5984, "step": 2038 }, { "epoch": 0.26, "grad_norm": 0.8120503871855917, "learning_rate": 9.740634475582499e-06, "loss": 0.6502, "step": 2039 }, { "epoch": 0.26, "grad_norm": 0.6498869588587249, "learning_rate": 9.740306441574656e-06, "loss": 0.5655, "step": 2040 }, { "epoch": 0.26, "grad_norm": 0.650287790228828, "learning_rate": 9.739978205787117e-06, "loss": 0.5129, "step": 2041 }, { "epoch": 0.26, "grad_norm": 1.4853959479523764, "learning_rate": 9.739649768233853e-06, "loss": 0.5819, "step": 2042 }, { "epoch": 0.26, "grad_norm": 0.6446973900005055, "learning_rate": 9.739321128928844e-06, "loss": 0.5536, "step": 2043 }, { "epoch": 0.26, "grad_norm": 0.6946298845745377, "learning_rate": 9.738992287886078e-06, "loss": 0.5143, "step": 2044 }, { "epoch": 0.26, "grad_norm": 0.5753733402622996, "learning_rate": 9.738663245119555e-06, "loss": 0.5252, "step": 2045 }, { "epoch": 0.26, "grad_norm": 0.7782048941045351, "learning_rate": 9.738334000643278e-06, "loss": 0.5927, "step": 2046 }, { "epoch": 0.26, "grad_norm": 0.9996804655740105, "learning_rate": 9.738004554471267e-06, "loss": 0.6152, "step": 2047 }, { "epoch": 0.26, "grad_norm": 0.8691303856697805, "learning_rate": 9.737674906617543e-06, "loss": 0.6083, "step": 2048 }, { "epoch": 0.26, "grad_norm": 0.6258160618603322, "learning_rate": 9.737345057096135e-06, "loss": 0.5168, "step": 2049 }, { "epoch": 0.26, "grad_norm": 0.775293773299195, "learning_rate": 9.737015005921088e-06, "loss": 0.6381, "step": 2050 }, { "epoch": 0.26, "grad_norm": 0.6385407024821724, "learning_rate": 9.73668475310645e-06, "loss": 0.5543, "step": 2051 }, { "epoch": 0.26, "grad_norm": 0.5861410800895339, "learning_rate": 9.736354298666277e-06, "loss": 0.4962, "step": 2052 }, { "epoch": 0.26, "grad_norm": 0.9199703456080394, "learning_rate": 9.736023642614638e-06, "loss": 0.6674, "step": 2053 }, { "epoch": 0.26, "grad_norm": 0.9154313901885083, "learning_rate": 9.735692784965606e-06, "loss": 0.6767, "step": 2054 }, { "epoch": 0.26, "grad_norm": 1.1384153897500089, "learning_rate": 9.735361725733265e-06, "loss": 0.6438, "step": 2055 }, { "epoch": 0.26, "grad_norm": 0.6542157276954875, "learning_rate": 9.735030464931707e-06, "loss": 0.5181, "step": 2056 }, { "epoch": 0.26, "grad_norm": 0.7714882273538483, "learning_rate": 9.734699002575035e-06, "loss": 0.6218, "step": 2057 }, { "epoch": 0.26, "grad_norm": 0.9906874791748633, "learning_rate": 9.734367338677355e-06, "loss": 0.6813, "step": 2058 }, { "epoch": 0.26, "grad_norm": 0.6833249408232183, "learning_rate": 9.734035473252786e-06, "loss": 0.5471, "step": 2059 }, { "epoch": 0.26, "grad_norm": 0.6003145554045849, "learning_rate": 9.733703406315455e-06, "loss": 0.5351, "step": 2060 }, { "epoch": 0.26, "grad_norm": 0.7307122835592621, "learning_rate": 9.733371137879498e-06, "loss": 0.5821, "step": 2061 }, { "epoch": 0.26, "grad_norm": 0.6831646478598287, "learning_rate": 9.733038667959054e-06, "loss": 0.5798, "step": 2062 }, { "epoch": 0.26, "grad_norm": 0.5258412523513408, "learning_rate": 9.73270599656828e-06, "loss": 0.4822, "step": 2063 }, { "epoch": 0.26, "grad_norm": 1.288210033872429, "learning_rate": 9.732373123721337e-06, "loss": 0.635, "step": 2064 }, { "epoch": 0.26, "grad_norm": 0.6161080538188768, "learning_rate": 9.732040049432393e-06, "loss": 0.5423, "step": 2065 }, { "epoch": 0.26, "grad_norm": 0.9489274096999086, "learning_rate": 9.731706773715624e-06, "loss": 0.6287, "step": 2066 }, { "epoch": 0.26, "grad_norm": 0.6266522673051259, "learning_rate": 9.731373296585218e-06, "loss": 0.5672, "step": 2067 }, { "epoch": 0.26, "grad_norm": 0.6733008292071735, "learning_rate": 9.73103961805537e-06, "loss": 0.6035, "step": 2068 }, { "epoch": 0.26, "grad_norm": 0.722691724097056, "learning_rate": 9.730705738140284e-06, "loss": 0.6052, "step": 2069 }, { "epoch": 0.26, "grad_norm": 0.5733938543934274, "learning_rate": 9.730371656854172e-06, "loss": 0.4776, "step": 2070 }, { "epoch": 0.26, "grad_norm": 0.620428591961551, "learning_rate": 9.730037374211255e-06, "loss": 0.5432, "step": 2071 }, { "epoch": 0.26, "grad_norm": 0.7010401930330775, "learning_rate": 9.729702890225761e-06, "loss": 0.5135, "step": 2072 }, { "epoch": 0.26, "grad_norm": 0.6290161986027752, "learning_rate": 9.729368204911928e-06, "loss": 0.5256, "step": 2073 }, { "epoch": 0.26, "grad_norm": 0.6798955498123993, "learning_rate": 9.729033318284005e-06, "loss": 0.5764, "step": 2074 }, { "epoch": 0.26, "grad_norm": 0.7569271787854982, "learning_rate": 9.728698230356246e-06, "loss": 0.5505, "step": 2075 }, { "epoch": 0.26, "grad_norm": 0.6169296838439209, "learning_rate": 9.728362941142913e-06, "loss": 0.5342, "step": 2076 }, { "epoch": 0.26, "grad_norm": 0.665497242148675, "learning_rate": 9.72802745065828e-06, "loss": 0.559, "step": 2077 }, { "epoch": 0.26, "grad_norm": 0.8047324862964884, "learning_rate": 9.727691758916627e-06, "loss": 0.6709, "step": 2078 }, { "epoch": 0.26, "grad_norm": 0.6871136990546536, "learning_rate": 9.727355865932242e-06, "loss": 0.5583, "step": 2079 }, { "epoch": 0.26, "grad_norm": 0.6454370635444046, "learning_rate": 9.727019771719427e-06, "loss": 0.522, "step": 2080 }, { "epoch": 0.27, "grad_norm": 0.6001209377809871, "learning_rate": 9.726683476292484e-06, "loss": 0.542, "step": 2081 }, { "epoch": 0.27, "grad_norm": 0.6933759361085206, "learning_rate": 9.72634697966573e-06, "loss": 0.5507, "step": 2082 }, { "epoch": 0.27, "grad_norm": 0.9055509903938554, "learning_rate": 9.726010281853488e-06, "loss": 0.6022, "step": 2083 }, { "epoch": 0.27, "grad_norm": 0.6116977883299418, "learning_rate": 9.725673382870092e-06, "loss": 0.5139, "step": 2084 }, { "epoch": 0.27, "grad_norm": 0.6370131943565843, "learning_rate": 9.725336282729877e-06, "loss": 0.5515, "step": 2085 }, { "epoch": 0.27, "grad_norm": 0.9004152023727671, "learning_rate": 9.7249989814472e-06, "loss": 0.6576, "step": 2086 }, { "epoch": 0.27, "grad_norm": 1.0158407119320836, "learning_rate": 9.724661479036414e-06, "loss": 0.5774, "step": 2087 }, { "epoch": 0.27, "grad_norm": 0.7675904043554551, "learning_rate": 9.724323775511888e-06, "loss": 0.6786, "step": 2088 }, { "epoch": 0.27, "grad_norm": 0.7489623594684662, "learning_rate": 9.723985870887995e-06, "loss": 0.5578, "step": 2089 }, { "epoch": 0.27, "grad_norm": 0.6084731557722258, "learning_rate": 9.723647765179119e-06, "loss": 0.5833, "step": 2090 }, { "epoch": 0.27, "grad_norm": 0.8331052745998051, "learning_rate": 9.723309458399652e-06, "loss": 0.6462, "step": 2091 }, { "epoch": 0.27, "grad_norm": 0.7359928436991852, "learning_rate": 9.722970950563995e-06, "loss": 0.5608, "step": 2092 }, { "epoch": 0.27, "grad_norm": 0.6837342295014889, "learning_rate": 9.722632241686559e-06, "loss": 0.5407, "step": 2093 }, { "epoch": 0.27, "grad_norm": 0.7729308964984413, "learning_rate": 9.722293331781758e-06, "loss": 0.5863, "step": 2094 }, { "epoch": 0.27, "grad_norm": 0.8614004223480878, "learning_rate": 9.721954220864019e-06, "loss": 0.6489, "step": 2095 }, { "epoch": 0.27, "grad_norm": 0.6232329294269511, "learning_rate": 9.721614908947781e-06, "loss": 0.5391, "step": 2096 }, { "epoch": 0.27, "grad_norm": 0.6077719243223584, "learning_rate": 9.721275396047483e-06, "loss": 0.5393, "step": 2097 }, { "epoch": 0.27, "grad_norm": 0.558351867007384, "learning_rate": 9.720935682177577e-06, "loss": 0.4888, "step": 2098 }, { "epoch": 0.27, "grad_norm": 0.8062406355126962, "learning_rate": 9.720595767352527e-06, "loss": 0.673, "step": 2099 }, { "epoch": 0.27, "grad_norm": 0.9037491752296137, "learning_rate": 9.720255651586799e-06, "loss": 0.6714, "step": 2100 }, { "epoch": 0.27, "grad_norm": 0.7360739154254519, "learning_rate": 9.719915334894871e-06, "loss": 0.6018, "step": 2101 }, { "epoch": 0.27, "grad_norm": 0.7831402654138032, "learning_rate": 9.71957481729123e-06, "loss": 0.5486, "step": 2102 }, { "epoch": 0.27, "grad_norm": 1.0765839399703987, "learning_rate": 9.719234098790374e-06, "loss": 0.5915, "step": 2103 }, { "epoch": 0.27, "grad_norm": 0.7966734872967898, "learning_rate": 9.718893179406798e-06, "loss": 0.61, "step": 2104 }, { "epoch": 0.27, "grad_norm": 0.971240792968848, "learning_rate": 9.718552059155022e-06, "loss": 0.6608, "step": 2105 }, { "epoch": 0.27, "grad_norm": 0.5970469956752246, "learning_rate": 9.718210738049563e-06, "loss": 0.4595, "step": 2106 }, { "epoch": 0.27, "grad_norm": 0.5920103825747084, "learning_rate": 9.71786921610495e-06, "loss": 0.5117, "step": 2107 }, { "epoch": 0.27, "grad_norm": 0.8117674785784921, "learning_rate": 9.71752749333572e-06, "loss": 0.6788, "step": 2108 }, { "epoch": 0.27, "grad_norm": 0.5256254889132973, "learning_rate": 9.717185569756419e-06, "loss": 0.46, "step": 2109 }, { "epoch": 0.27, "grad_norm": 0.5834368545417268, "learning_rate": 9.716843445381603e-06, "loss": 0.5359, "step": 2110 }, { "epoch": 0.27, "grad_norm": 1.0017053379236673, "learning_rate": 9.716501120225834e-06, "loss": 0.6183, "step": 2111 }, { "epoch": 0.27, "grad_norm": 0.6548945424440169, "learning_rate": 9.716158594303685e-06, "loss": 0.5459, "step": 2112 }, { "epoch": 0.27, "grad_norm": 2.23223843284866, "learning_rate": 9.715815867629735e-06, "loss": 0.6646, "step": 2113 }, { "epoch": 0.27, "grad_norm": 0.659091643792312, "learning_rate": 9.715472940218573e-06, "loss": 0.5551, "step": 2114 }, { "epoch": 0.27, "grad_norm": 0.6976834670146201, "learning_rate": 9.715129812084795e-06, "loss": 0.5258, "step": 2115 }, { "epoch": 0.27, "grad_norm": 0.8699304617573581, "learning_rate": 9.71478648324301e-06, "loss": 0.6653, "step": 2116 }, { "epoch": 0.27, "grad_norm": 0.8140126412568707, "learning_rate": 9.71444295370783e-06, "loss": 0.6282, "step": 2117 }, { "epoch": 0.27, "grad_norm": 0.5488027208872968, "learning_rate": 9.71409922349388e-06, "loss": 0.5297, "step": 2118 }, { "epoch": 0.27, "grad_norm": 0.7111858818212421, "learning_rate": 9.713755292615789e-06, "loss": 0.5571, "step": 2119 }, { "epoch": 0.27, "grad_norm": 0.8020520271094485, "learning_rate": 9.713411161088198e-06, "loss": 0.6039, "step": 2120 }, { "epoch": 0.27, "grad_norm": 0.7866973296861831, "learning_rate": 9.713066828925757e-06, "loss": 0.6567, "step": 2121 }, { "epoch": 0.27, "grad_norm": 0.642506004797298, "learning_rate": 9.712722296143121e-06, "loss": 0.5614, "step": 2122 }, { "epoch": 0.27, "grad_norm": 0.7503891770795013, "learning_rate": 9.712377562754957e-06, "loss": 0.6174, "step": 2123 }, { "epoch": 0.27, "grad_norm": 0.7500400246988823, "learning_rate": 9.712032628775939e-06, "loss": 0.5557, "step": 2124 }, { "epoch": 0.27, "grad_norm": 0.6542104740649382, "learning_rate": 9.711687494220748e-06, "loss": 0.5911, "step": 2125 }, { "epoch": 0.27, "grad_norm": 1.0160392181510352, "learning_rate": 9.711342159104078e-06, "loss": 0.6453, "step": 2126 }, { "epoch": 0.27, "grad_norm": 0.7892779950190293, "learning_rate": 9.710996623440627e-06, "loss": 0.5836, "step": 2127 }, { "epoch": 0.27, "grad_norm": 0.7363641448666912, "learning_rate": 9.710650887245103e-06, "loss": 0.5829, "step": 2128 }, { "epoch": 0.27, "grad_norm": 0.8431743530457255, "learning_rate": 9.710304950532225e-06, "loss": 0.5804, "step": 2129 }, { "epoch": 0.27, "grad_norm": 0.8824858658654475, "learning_rate": 9.709958813316718e-06, "loss": 0.6999, "step": 2130 }, { "epoch": 0.27, "grad_norm": 0.8998131890206996, "learning_rate": 9.709612475613315e-06, "loss": 0.6894, "step": 2131 }, { "epoch": 0.27, "grad_norm": 0.5910197690092174, "learning_rate": 9.709265937436758e-06, "loss": 0.5738, "step": 2132 }, { "epoch": 0.27, "grad_norm": 1.019065391040781, "learning_rate": 9.708919198801799e-06, "loss": 0.6415, "step": 2133 }, { "epoch": 0.27, "grad_norm": 0.6614502965096164, "learning_rate": 9.708572259723198e-06, "loss": 0.5642, "step": 2134 }, { "epoch": 0.27, "grad_norm": 0.8262117858413601, "learning_rate": 9.70822512021572e-06, "loss": 0.6035, "step": 2135 }, { "epoch": 0.27, "grad_norm": 0.9290103171899875, "learning_rate": 9.707877780294147e-06, "loss": 0.6671, "step": 2136 }, { "epoch": 0.27, "grad_norm": 0.72120369339109, "learning_rate": 9.707530239973257e-06, "loss": 0.5314, "step": 2137 }, { "epoch": 0.27, "grad_norm": 0.6497313724067862, "learning_rate": 9.707182499267851e-06, "loss": 0.5609, "step": 2138 }, { "epoch": 0.27, "grad_norm": 0.8843671026780996, "learning_rate": 9.706834558192728e-06, "loss": 0.6537, "step": 2139 }, { "epoch": 0.27, "grad_norm": 0.6927955833608418, "learning_rate": 9.706486416762696e-06, "loss": 0.5397, "step": 2140 }, { "epoch": 0.27, "grad_norm": 0.7035186658685634, "learning_rate": 9.706138074992581e-06, "loss": 0.625, "step": 2141 }, { "epoch": 0.27, "grad_norm": 0.6516548353517828, "learning_rate": 9.705789532897205e-06, "loss": 0.5345, "step": 2142 }, { "epoch": 0.27, "grad_norm": 0.6039563025733024, "learning_rate": 9.705440790491406e-06, "loss": 0.5231, "step": 2143 }, { "epoch": 0.27, "grad_norm": 0.7199901116167109, "learning_rate": 9.705091847790029e-06, "loss": 0.6729, "step": 2144 }, { "epoch": 0.27, "grad_norm": 0.8138226573688557, "learning_rate": 9.704742704807928e-06, "loss": 0.6693, "step": 2145 }, { "epoch": 0.27, "grad_norm": 0.8791064321004185, "learning_rate": 9.704393361559963e-06, "loss": 0.5696, "step": 2146 }, { "epoch": 0.27, "grad_norm": 0.7829314545257753, "learning_rate": 9.704043818061007e-06, "loss": 0.5571, "step": 2147 }, { "epoch": 0.27, "grad_norm": 0.9699664155305645, "learning_rate": 9.703694074325935e-06, "loss": 0.6281, "step": 2148 }, { "epoch": 0.27, "grad_norm": 0.8313151676250713, "learning_rate": 9.70334413036964e-06, "loss": 0.6658, "step": 2149 }, { "epoch": 0.27, "grad_norm": 0.6751066000361966, "learning_rate": 9.702993986207014e-06, "loss": 0.5604, "step": 2150 }, { "epoch": 0.27, "grad_norm": 0.7255453828137651, "learning_rate": 9.702643641852963e-06, "loss": 0.5996, "step": 2151 }, { "epoch": 0.27, "grad_norm": 0.6983987210473735, "learning_rate": 9.7022930973224e-06, "loss": 0.5415, "step": 2152 }, { "epoch": 0.27, "grad_norm": 0.936193879001611, "learning_rate": 9.701942352630246e-06, "loss": 0.6409, "step": 2153 }, { "epoch": 0.27, "grad_norm": 0.912956042654097, "learning_rate": 9.701591407791431e-06, "loss": 0.6389, "step": 2154 }, { "epoch": 0.27, "grad_norm": 0.7230062589302516, "learning_rate": 9.701240262820894e-06, "loss": 0.5882, "step": 2155 }, { "epoch": 0.27, "grad_norm": 0.6272954988725057, "learning_rate": 9.700888917733582e-06, "loss": 0.5692, "step": 2156 }, { "epoch": 0.27, "grad_norm": 1.322684260098865, "learning_rate": 9.70053737254445e-06, "loss": 0.6119, "step": 2157 }, { "epoch": 0.27, "grad_norm": 0.6771476018489737, "learning_rate": 9.700185627268463e-06, "loss": 0.5113, "step": 2158 }, { "epoch": 0.28, "grad_norm": 0.6986586269348238, "learning_rate": 9.699833681920595e-06, "loss": 0.5369, "step": 2159 }, { "epoch": 0.28, "grad_norm": 0.8707171763834329, "learning_rate": 9.699481536515824e-06, "loss": 0.6593, "step": 2160 }, { "epoch": 0.28, "grad_norm": 1.0172857832492763, "learning_rate": 9.699129191069141e-06, "loss": 0.6357, "step": 2161 }, { "epoch": 0.28, "grad_norm": 0.7348842636230057, "learning_rate": 9.698776645595546e-06, "loss": 0.6248, "step": 2162 }, { "epoch": 0.28, "grad_norm": 0.798755867497702, "learning_rate": 9.698423900110046e-06, "loss": 0.6579, "step": 2163 }, { "epoch": 0.28, "grad_norm": 1.1928872004184816, "learning_rate": 9.698070954627652e-06, "loss": 0.664, "step": 2164 }, { "epoch": 0.28, "grad_norm": 0.7014593433875881, "learning_rate": 9.69771780916339e-06, "loss": 0.5624, "step": 2165 }, { "epoch": 0.28, "grad_norm": 0.767277100934089, "learning_rate": 9.697364463732293e-06, "loss": 0.6384, "step": 2166 }, { "epoch": 0.28, "grad_norm": 0.9248346711069855, "learning_rate": 9.697010918349402e-06, "loss": 0.6596, "step": 2167 }, { "epoch": 0.28, "grad_norm": 0.6387516173536527, "learning_rate": 9.696657173029767e-06, "loss": 0.5607, "step": 2168 }, { "epoch": 0.28, "grad_norm": 0.8047509435201944, "learning_rate": 9.696303227788441e-06, "loss": 0.6464, "step": 2169 }, { "epoch": 0.28, "grad_norm": 0.6238519105509818, "learning_rate": 9.695949082640497e-06, "loss": 0.5011, "step": 2170 }, { "epoch": 0.28, "grad_norm": 0.7983154023591998, "learning_rate": 9.695594737601006e-06, "loss": 0.5157, "step": 2171 }, { "epoch": 0.28, "grad_norm": 0.948630607940082, "learning_rate": 9.695240192685051e-06, "loss": 0.6251, "step": 2172 }, { "epoch": 0.28, "grad_norm": 0.5868660188042967, "learning_rate": 9.694885447907726e-06, "loss": 0.5212, "step": 2173 }, { "epoch": 0.28, "grad_norm": 0.7581979463662551, "learning_rate": 9.69453050328413e-06, "loss": 0.6261, "step": 2174 }, { "epoch": 0.28, "grad_norm": 0.7541088329169049, "learning_rate": 9.694175358829372e-06, "loss": 0.6103, "step": 2175 }, { "epoch": 0.28, "grad_norm": 0.5998178387441944, "learning_rate": 9.693820014558568e-06, "loss": 0.5542, "step": 2176 }, { "epoch": 0.28, "grad_norm": 0.7863609600049947, "learning_rate": 9.693464470486847e-06, "loss": 0.6182, "step": 2177 }, { "epoch": 0.28, "grad_norm": 0.6913994240424528, "learning_rate": 9.69310872662934e-06, "loss": 0.6005, "step": 2178 }, { "epoch": 0.28, "grad_norm": 0.9020903523450176, "learning_rate": 9.692752783001194e-06, "loss": 0.6919, "step": 2179 }, { "epoch": 0.28, "grad_norm": 0.8009512130787827, "learning_rate": 9.692396639617556e-06, "loss": 0.6028, "step": 2180 }, { "epoch": 0.28, "grad_norm": 0.5603008308784059, "learning_rate": 9.692040296493587e-06, "loss": 0.5243, "step": 2181 }, { "epoch": 0.28, "grad_norm": 0.804830335848704, "learning_rate": 9.691683753644459e-06, "loss": 0.5833, "step": 2182 }, { "epoch": 0.28, "grad_norm": 0.7136042371926369, "learning_rate": 9.691327011085344e-06, "loss": 0.5521, "step": 2183 }, { "epoch": 0.28, "grad_norm": 0.7516852237198544, "learning_rate": 9.690970068831431e-06, "loss": 0.6193, "step": 2184 }, { "epoch": 0.28, "grad_norm": 0.6088908958862602, "learning_rate": 9.690612926897911e-06, "loss": 0.5562, "step": 2185 }, { "epoch": 0.28, "grad_norm": 0.7406939704811897, "learning_rate": 9.690255585299988e-06, "loss": 0.6627, "step": 2186 }, { "epoch": 0.28, "grad_norm": 0.9702544059608751, "learning_rate": 9.689898044052872e-06, "loss": 0.6357, "step": 2187 }, { "epoch": 0.28, "grad_norm": 0.6700948045728238, "learning_rate": 9.689540303171785e-06, "loss": 0.5431, "step": 2188 }, { "epoch": 0.28, "grad_norm": 0.7436438109257585, "learning_rate": 9.689182362671952e-06, "loss": 0.6278, "step": 2189 }, { "epoch": 0.28, "grad_norm": 0.7576916423630417, "learning_rate": 9.688824222568608e-06, "loss": 0.6341, "step": 2190 }, { "epoch": 0.28, "grad_norm": 0.6391553504785336, "learning_rate": 9.688465882877005e-06, "loss": 0.4978, "step": 2191 }, { "epoch": 0.28, "grad_norm": 1.0998597734639064, "learning_rate": 9.688107343612387e-06, "loss": 0.6933, "step": 2192 }, { "epoch": 0.28, "grad_norm": 0.6994534113107235, "learning_rate": 9.687748604790024e-06, "loss": 0.5918, "step": 2193 }, { "epoch": 0.28, "grad_norm": 0.7935273500053455, "learning_rate": 9.68738966642518e-06, "loss": 0.5797, "step": 2194 }, { "epoch": 0.28, "grad_norm": 0.6313576338974369, "learning_rate": 9.687030528533137e-06, "loss": 0.5135, "step": 2195 }, { "epoch": 0.28, "grad_norm": 0.58855144835993, "learning_rate": 9.686671191129184e-06, "loss": 0.564, "step": 2196 }, { "epoch": 0.28, "grad_norm": 0.6397217492402749, "learning_rate": 9.686311654228613e-06, "loss": 0.5565, "step": 2197 }, { "epoch": 0.28, "grad_norm": 0.6257870798373377, "learning_rate": 9.68595191784673e-06, "loss": 0.5277, "step": 2198 }, { "epoch": 0.28, "grad_norm": 0.7337667986495767, "learning_rate": 9.68559198199885e-06, "loss": 0.6309, "step": 2199 }, { "epoch": 0.28, "grad_norm": 0.5983308697317115, "learning_rate": 9.685231846700292e-06, "loss": 0.5012, "step": 2200 }, { "epoch": 0.28, "grad_norm": 0.6799392679002891, "learning_rate": 9.684871511966383e-06, "loss": 0.5181, "step": 2201 }, { "epoch": 0.28, "grad_norm": 0.6914395568735426, "learning_rate": 9.684510977812467e-06, "loss": 0.5455, "step": 2202 }, { "epoch": 0.28, "grad_norm": 0.6238811969387991, "learning_rate": 9.68415024425389e-06, "loss": 0.5432, "step": 2203 }, { "epoch": 0.28, "grad_norm": 0.6340758968912651, "learning_rate": 9.683789311306003e-06, "loss": 0.5409, "step": 2204 }, { "epoch": 0.28, "grad_norm": 0.6886613532073099, "learning_rate": 9.683428178984172e-06, "loss": 0.5815, "step": 2205 }, { "epoch": 0.28, "grad_norm": 0.6442929205541311, "learning_rate": 9.68306684730377e-06, "loss": 0.5504, "step": 2206 }, { "epoch": 0.28, "grad_norm": 0.8588064591456216, "learning_rate": 9.682705316280178e-06, "loss": 0.6147, "step": 2207 }, { "epoch": 0.28, "grad_norm": 0.8804960211736516, "learning_rate": 9.682343585928785e-06, "loss": 0.6474, "step": 2208 }, { "epoch": 0.28, "grad_norm": 0.9515813480598867, "learning_rate": 9.681981656264984e-06, "loss": 0.6407, "step": 2209 }, { "epoch": 0.28, "grad_norm": 0.5990587431565297, "learning_rate": 9.68161952730419e-06, "loss": 0.5491, "step": 2210 }, { "epoch": 0.28, "grad_norm": 0.6515404033001146, "learning_rate": 9.681257199061812e-06, "loss": 0.5397, "step": 2211 }, { "epoch": 0.28, "grad_norm": 0.5903850281832909, "learning_rate": 9.680894671553273e-06, "loss": 0.5919, "step": 2212 }, { "epoch": 0.28, "grad_norm": 0.7799265947403901, "learning_rate": 9.680531944794007e-06, "loss": 0.6372, "step": 2213 }, { "epoch": 0.28, "grad_norm": 0.9926507262328429, "learning_rate": 9.680169018799452e-06, "loss": 0.5947, "step": 2214 }, { "epoch": 0.28, "grad_norm": 0.6339056599829482, "learning_rate": 9.679805893585059e-06, "loss": 0.4961, "step": 2215 }, { "epoch": 0.28, "grad_norm": 0.6551315532687673, "learning_rate": 9.679442569166282e-06, "loss": 0.5736, "step": 2216 }, { "epoch": 0.28, "grad_norm": 0.6300361657396402, "learning_rate": 9.67907904555859e-06, "loss": 0.5434, "step": 2217 }, { "epoch": 0.28, "grad_norm": 0.9418706930656059, "learning_rate": 9.678715322777453e-06, "loss": 0.6208, "step": 2218 }, { "epoch": 0.28, "grad_norm": 0.6543659916555796, "learning_rate": 9.678351400838357e-06, "loss": 0.5593, "step": 2219 }, { "epoch": 0.28, "grad_norm": 0.8837889611829904, "learning_rate": 9.67798727975679e-06, "loss": 0.7165, "step": 2220 }, { "epoch": 0.28, "grad_norm": 0.8815426881808548, "learning_rate": 9.677622959548256e-06, "loss": 0.6614, "step": 2221 }, { "epoch": 0.28, "grad_norm": 0.6174900120372893, "learning_rate": 9.677258440228259e-06, "loss": 0.5333, "step": 2222 }, { "epoch": 0.28, "grad_norm": 0.8184534973285272, "learning_rate": 9.676893721812317e-06, "loss": 0.6351, "step": 2223 }, { "epoch": 0.28, "grad_norm": 0.6330019368009169, "learning_rate": 9.676528804315954e-06, "loss": 0.5142, "step": 2224 }, { "epoch": 0.28, "grad_norm": 0.8743920706670454, "learning_rate": 9.676163687754705e-06, "loss": 0.6303, "step": 2225 }, { "epoch": 0.28, "grad_norm": 0.6326861851605023, "learning_rate": 9.675798372144108e-06, "loss": 0.5446, "step": 2226 }, { "epoch": 0.28, "grad_norm": 1.0371082516296481, "learning_rate": 9.675432857499718e-06, "loss": 0.5686, "step": 2227 }, { "epoch": 0.28, "grad_norm": 0.836242150354895, "learning_rate": 9.675067143837092e-06, "loss": 0.642, "step": 2228 }, { "epoch": 0.28, "grad_norm": 0.8301236096375209, "learning_rate": 9.674701231171795e-06, "loss": 0.5979, "step": 2229 }, { "epoch": 0.28, "grad_norm": 2.1578010235586267, "learning_rate": 9.674335119519407e-06, "loss": 0.6818, "step": 2230 }, { "epoch": 0.28, "grad_norm": 0.9290913461612924, "learning_rate": 9.673968808895509e-06, "loss": 0.6093, "step": 2231 }, { "epoch": 0.28, "grad_norm": 0.7342519929147039, "learning_rate": 9.673602299315694e-06, "loss": 0.596, "step": 2232 }, { "epoch": 0.28, "grad_norm": 0.6525783827942293, "learning_rate": 9.673235590795565e-06, "loss": 0.5375, "step": 2233 }, { "epoch": 0.28, "grad_norm": 0.5989832941113682, "learning_rate": 9.672868683350731e-06, "loss": 0.5109, "step": 2234 }, { "epoch": 0.28, "grad_norm": 0.6569948231952824, "learning_rate": 9.67250157699681e-06, "loss": 0.5465, "step": 2235 }, { "epoch": 0.28, "grad_norm": 0.6724503094389964, "learning_rate": 9.672134271749425e-06, "loss": 0.5342, "step": 2236 }, { "epoch": 0.28, "grad_norm": 1.0433338392795097, "learning_rate": 9.671766767624215e-06, "loss": 0.659, "step": 2237 }, { "epoch": 0.29, "grad_norm": 0.6852954531542389, "learning_rate": 9.671399064636824e-06, "loss": 0.5177, "step": 2238 }, { "epoch": 0.29, "grad_norm": 0.7620657504708801, "learning_rate": 9.671031162802901e-06, "loss": 0.6211, "step": 2239 }, { "epoch": 0.29, "grad_norm": 0.7714690157358612, "learning_rate": 9.670663062138111e-06, "loss": 0.5459, "step": 2240 }, { "epoch": 0.29, "grad_norm": 0.6553942130009848, "learning_rate": 9.670294762658116e-06, "loss": 0.546, "step": 2241 }, { "epoch": 0.29, "grad_norm": 0.62564941691109, "learning_rate": 9.669926264378598e-06, "loss": 0.556, "step": 2242 }, { "epoch": 0.29, "grad_norm": 0.5762097403614382, "learning_rate": 9.669557567315242e-06, "loss": 0.5195, "step": 2243 }, { "epoch": 0.29, "grad_norm": 0.8532764287259377, "learning_rate": 9.669188671483742e-06, "loss": 0.6213, "step": 2244 }, { "epoch": 0.29, "grad_norm": 0.7489377850365356, "learning_rate": 9.668819576899802e-06, "loss": 0.5236, "step": 2245 }, { "epoch": 0.29, "grad_norm": 0.7617188167186214, "learning_rate": 9.668450283579132e-06, "loss": 0.6345, "step": 2246 }, { "epoch": 0.29, "grad_norm": 0.851474713301415, "learning_rate": 9.668080791537451e-06, "loss": 0.6059, "step": 2247 }, { "epoch": 0.29, "grad_norm": 0.7362806713736375, "learning_rate": 9.667711100790487e-06, "loss": 0.5474, "step": 2248 }, { "epoch": 0.29, "grad_norm": 0.6993209279475131, "learning_rate": 9.667341211353979e-06, "loss": 0.5467, "step": 2249 }, { "epoch": 0.29, "grad_norm": 0.6047618263812696, "learning_rate": 9.66697112324367e-06, "loss": 0.5799, "step": 2250 }, { "epoch": 0.29, "grad_norm": 0.6760460559516863, "learning_rate": 9.666600836475313e-06, "loss": 0.5667, "step": 2251 }, { "epoch": 0.29, "grad_norm": 0.8904894784357311, "learning_rate": 9.66623035106467e-06, "loss": 0.6271, "step": 2252 }, { "epoch": 0.29, "grad_norm": 0.5959552961352129, "learning_rate": 9.665859667027514e-06, "loss": 0.5595, "step": 2253 }, { "epoch": 0.29, "grad_norm": 0.5915119580046773, "learning_rate": 9.665488784379619e-06, "loss": 0.5449, "step": 2254 }, { "epoch": 0.29, "grad_norm": 0.7050973527303205, "learning_rate": 9.665117703136778e-06, "loss": 0.5475, "step": 2255 }, { "epoch": 0.29, "grad_norm": 0.5741843191594764, "learning_rate": 9.664746423314783e-06, "loss": 0.4807, "step": 2256 }, { "epoch": 0.29, "grad_norm": 0.605516106318389, "learning_rate": 9.66437494492944e-06, "loss": 0.4949, "step": 2257 }, { "epoch": 0.29, "grad_norm": 0.7298421166590036, "learning_rate": 9.66400326799656e-06, "loss": 0.6303, "step": 2258 }, { "epoch": 0.29, "grad_norm": 0.8276973612240265, "learning_rate": 9.663631392531964e-06, "loss": 0.5599, "step": 2259 }, { "epoch": 0.29, "grad_norm": 0.7930071135800971, "learning_rate": 9.663259318551484e-06, "loss": 0.5948, "step": 2260 }, { "epoch": 0.29, "grad_norm": 0.8995766276937424, "learning_rate": 9.662887046070955e-06, "loss": 0.5952, "step": 2261 }, { "epoch": 0.29, "grad_norm": 0.6427601142691367, "learning_rate": 9.662514575106226e-06, "loss": 0.5744, "step": 2262 }, { "epoch": 0.29, "grad_norm": 0.828253738518374, "learning_rate": 9.66214190567315e-06, "loss": 0.6222, "step": 2263 }, { "epoch": 0.29, "grad_norm": 0.7720476783409141, "learning_rate": 9.661769037787593e-06, "loss": 0.5892, "step": 2264 }, { "epoch": 0.29, "grad_norm": 0.5936003716856899, "learning_rate": 9.661395971465425e-06, "loss": 0.4931, "step": 2265 }, { "epoch": 0.29, "grad_norm": 0.6928100126960826, "learning_rate": 9.661022706722522e-06, "loss": 0.5143, "step": 2266 }, { "epoch": 0.29, "grad_norm": 0.6655182767575827, "learning_rate": 9.66064924357478e-06, "loss": 0.5259, "step": 2267 }, { "epoch": 0.29, "grad_norm": 0.7407844825951722, "learning_rate": 9.660275582038095e-06, "loss": 0.5317, "step": 2268 }, { "epoch": 0.29, "grad_norm": 0.7158721229727145, "learning_rate": 9.659901722128366e-06, "loss": 0.5526, "step": 2269 }, { "epoch": 0.29, "grad_norm": 0.7424287306296785, "learning_rate": 9.659527663861513e-06, "loss": 0.6317, "step": 2270 }, { "epoch": 0.29, "grad_norm": 1.107374494985659, "learning_rate": 9.65915340725346e-06, "loss": 0.6258, "step": 2271 }, { "epoch": 0.29, "grad_norm": 0.6830529175884509, "learning_rate": 9.658778952320133e-06, "loss": 0.6194, "step": 2272 }, { "epoch": 0.29, "grad_norm": 0.6713118487544459, "learning_rate": 9.658404299077472e-06, "loss": 0.5113, "step": 2273 }, { "epoch": 0.29, "grad_norm": 0.8508721732925933, "learning_rate": 9.658029447541429e-06, "loss": 0.5819, "step": 2274 }, { "epoch": 0.29, "grad_norm": 0.6838730161706225, "learning_rate": 9.657654397727956e-06, "loss": 0.5601, "step": 2275 }, { "epoch": 0.29, "grad_norm": 0.7911712009038191, "learning_rate": 9.657279149653018e-06, "loss": 0.5822, "step": 2276 }, { "epoch": 0.29, "grad_norm": 0.776451702863992, "learning_rate": 9.65690370333259e-06, "loss": 0.6277, "step": 2277 }, { "epoch": 0.29, "grad_norm": 0.8645971786151312, "learning_rate": 9.656528058782653e-06, "loss": 0.6251, "step": 2278 }, { "epoch": 0.29, "grad_norm": 0.8527749737790213, "learning_rate": 9.656152216019197e-06, "loss": 0.6788, "step": 2279 }, { "epoch": 0.29, "grad_norm": 0.8828743894008848, "learning_rate": 9.655776175058218e-06, "loss": 0.6028, "step": 2280 }, { "epoch": 0.29, "grad_norm": 0.823956024995578, "learning_rate": 9.655399935915728e-06, "loss": 0.6078, "step": 2281 }, { "epoch": 0.29, "grad_norm": 0.925549441991507, "learning_rate": 9.655023498607736e-06, "loss": 0.6605, "step": 2282 }, { "epoch": 0.29, "grad_norm": 0.6801753009251831, "learning_rate": 9.654646863150271e-06, "loss": 0.568, "step": 2283 }, { "epoch": 0.29, "grad_norm": 0.7717913263725459, "learning_rate": 9.654270029559362e-06, "loss": 0.6152, "step": 2284 }, { "epoch": 0.29, "grad_norm": 0.8557536389595195, "learning_rate": 9.653892997851052e-06, "loss": 0.6703, "step": 2285 }, { "epoch": 0.29, "grad_norm": 0.8793890657596471, "learning_rate": 9.653515768041388e-06, "loss": 0.6016, "step": 2286 }, { "epoch": 0.29, "grad_norm": 0.6443234170989017, "learning_rate": 9.653138340146429e-06, "loss": 0.5274, "step": 2287 }, { "epoch": 0.29, "grad_norm": 0.8172794242502196, "learning_rate": 9.65276071418224e-06, "loss": 0.5886, "step": 2288 }, { "epoch": 0.29, "grad_norm": 0.686034579986197, "learning_rate": 9.652382890164895e-06, "loss": 0.5846, "step": 2289 }, { "epoch": 0.29, "grad_norm": 0.8096073641913435, "learning_rate": 9.652004868110477e-06, "loss": 0.5808, "step": 2290 }, { "epoch": 0.29, "grad_norm": 0.624890273928074, "learning_rate": 9.65162664803508e-06, "loss": 0.5363, "step": 2291 }, { "epoch": 0.29, "grad_norm": 0.6749908950627436, "learning_rate": 9.651248229954798e-06, "loss": 0.6828, "step": 2292 }, { "epoch": 0.29, "grad_norm": 0.8880591936931553, "learning_rate": 9.650869613885742e-06, "loss": 0.6491, "step": 2293 }, { "epoch": 0.29, "grad_norm": 0.8159750356945049, "learning_rate": 9.65049079984403e-06, "loss": 0.6944, "step": 2294 }, { "epoch": 0.29, "grad_norm": 0.7418131176842386, "learning_rate": 9.650111787845784e-06, "loss": 0.6, "step": 2295 }, { "epoch": 0.29, "grad_norm": 1.0461700823998974, "learning_rate": 9.64973257790714e-06, "loss": 0.5587, "step": 2296 }, { "epoch": 0.29, "grad_norm": 0.6101097244691224, "learning_rate": 9.64935317004424e-06, "loss": 0.5083, "step": 2297 }, { "epoch": 0.29, "grad_norm": 1.275132176881301, "learning_rate": 9.648973564273232e-06, "loss": 0.5649, "step": 2298 }, { "epoch": 0.29, "grad_norm": 0.7032765826619104, "learning_rate": 9.648593760610274e-06, "loss": 0.6405, "step": 2299 }, { "epoch": 0.29, "grad_norm": 0.6759865672860612, "learning_rate": 9.648213759071536e-06, "loss": 0.4923, "step": 2300 }, { "epoch": 0.29, "grad_norm": 0.8516325001087683, "learning_rate": 9.64783355967319e-06, "loss": 0.5733, "step": 2301 }, { "epoch": 0.29, "grad_norm": 1.0391152789607287, "learning_rate": 9.647453162431422e-06, "loss": 0.6847, "step": 2302 }, { "epoch": 0.29, "grad_norm": 0.7283393465124091, "learning_rate": 9.647072567362426e-06, "loss": 0.6249, "step": 2303 }, { "epoch": 0.29, "grad_norm": 0.5795741497487102, "learning_rate": 9.6466917744824e-06, "loss": 0.548, "step": 2304 }, { "epoch": 0.29, "grad_norm": 0.6297529108765231, "learning_rate": 9.646310783807552e-06, "loss": 0.5269, "step": 2305 }, { "epoch": 0.29, "grad_norm": 0.7688890632750403, "learning_rate": 9.645929595354101e-06, "loss": 0.583, "step": 2306 }, { "epoch": 0.29, "grad_norm": 0.5647579057091476, "learning_rate": 9.645548209138277e-06, "loss": 0.4726, "step": 2307 }, { "epoch": 0.29, "grad_norm": 0.6696597283297824, "learning_rate": 9.645166625176308e-06, "loss": 0.5587, "step": 2308 }, { "epoch": 0.29, "grad_norm": 0.5912881255480464, "learning_rate": 9.64478484348444e-06, "loss": 0.5827, "step": 2309 }, { "epoch": 0.29, "grad_norm": 0.9013934620363223, "learning_rate": 9.644402864078925e-06, "loss": 0.6017, "step": 2310 }, { "epoch": 0.29, "grad_norm": 0.8091548399589423, "learning_rate": 9.644020686976022e-06, "loss": 0.5655, "step": 2311 }, { "epoch": 0.29, "grad_norm": 0.6815593571986776, "learning_rate": 9.643638312191996e-06, "loss": 0.5179, "step": 2312 }, { "epoch": 0.29, "grad_norm": 0.6353061674175475, "learning_rate": 9.643255739743128e-06, "loss": 0.5628, "step": 2313 }, { "epoch": 0.29, "grad_norm": 0.5980690191909007, "learning_rate": 9.6428729696457e-06, "loss": 0.5004, "step": 2314 }, { "epoch": 0.29, "grad_norm": 0.5907665810638241, "learning_rate": 9.642490001916004e-06, "loss": 0.584, "step": 2315 }, { "epoch": 0.3, "grad_norm": 0.6795787447301739, "learning_rate": 9.642106836570349e-06, "loss": 0.5571, "step": 2316 }, { "epoch": 0.3, "grad_norm": 0.6581148947684329, "learning_rate": 9.641723473625036e-06, "loss": 0.5441, "step": 2317 }, { "epoch": 0.3, "grad_norm": 0.6025978028439085, "learning_rate": 9.64133991309639e-06, "loss": 0.5222, "step": 2318 }, { "epoch": 0.3, "grad_norm": 0.6102422844649658, "learning_rate": 9.640956155000734e-06, "loss": 0.4969, "step": 2319 }, { "epoch": 0.3, "grad_norm": 0.5951995875521914, "learning_rate": 9.640572199354404e-06, "loss": 0.4884, "step": 2320 }, { "epoch": 0.3, "grad_norm": 0.8117996206937678, "learning_rate": 9.640188046173746e-06, "loss": 0.6194, "step": 2321 }, { "epoch": 0.3, "grad_norm": 0.569777508360132, "learning_rate": 9.639803695475111e-06, "loss": 0.501, "step": 2322 }, { "epoch": 0.3, "grad_norm": 0.7353086269939838, "learning_rate": 9.639419147274858e-06, "loss": 0.5792, "step": 2323 }, { "epoch": 0.3, "grad_norm": 0.8305030844985611, "learning_rate": 9.639034401589359e-06, "loss": 0.6425, "step": 2324 }, { "epoch": 0.3, "grad_norm": 0.6875012573272089, "learning_rate": 9.638649458434989e-06, "loss": 0.4913, "step": 2325 }, { "epoch": 0.3, "grad_norm": 0.6104260420512554, "learning_rate": 9.638264317828135e-06, "loss": 0.5186, "step": 2326 }, { "epoch": 0.3, "grad_norm": 0.8513012735792077, "learning_rate": 9.63787897978519e-06, "loss": 0.6214, "step": 2327 }, { "epoch": 0.3, "grad_norm": 0.8140359431336607, "learning_rate": 9.637493444322557e-06, "loss": 0.6138, "step": 2328 }, { "epoch": 0.3, "grad_norm": 0.9359810761278444, "learning_rate": 9.63710771145665e-06, "loss": 0.5982, "step": 2329 }, { "epoch": 0.3, "grad_norm": 0.7033344549949977, "learning_rate": 9.636721781203882e-06, "loss": 0.5665, "step": 2330 }, { "epoch": 0.3, "grad_norm": 0.712401301102582, "learning_rate": 9.636335653580687e-06, "loss": 0.5778, "step": 2331 }, { "epoch": 0.3, "grad_norm": 0.6295356997241944, "learning_rate": 9.635949328603499e-06, "loss": 0.5391, "step": 2332 }, { "epoch": 0.3, "grad_norm": 0.7082610788110011, "learning_rate": 9.635562806288763e-06, "loss": 0.6003, "step": 2333 }, { "epoch": 0.3, "grad_norm": 0.5721241947970244, "learning_rate": 9.635176086652929e-06, "loss": 0.5039, "step": 2334 }, { "epoch": 0.3, "grad_norm": 2.3745115336996983, "learning_rate": 9.634789169712461e-06, "loss": 0.6207, "step": 2335 }, { "epoch": 0.3, "grad_norm": 0.9141369306134102, "learning_rate": 9.634402055483832e-06, "loss": 0.608, "step": 2336 }, { "epoch": 0.3, "grad_norm": 0.6391256201758749, "learning_rate": 9.634014743983513e-06, "loss": 0.548, "step": 2337 }, { "epoch": 0.3, "grad_norm": 0.6602876220896754, "learning_rate": 9.633627235227998e-06, "loss": 0.5919, "step": 2338 }, { "epoch": 0.3, "grad_norm": 0.7155167772219896, "learning_rate": 9.633239529233776e-06, "loss": 0.4985, "step": 2339 }, { "epoch": 0.3, "grad_norm": 0.7546479779069812, "learning_rate": 9.632851626017355e-06, "loss": 0.6248, "step": 2340 }, { "epoch": 0.3, "grad_norm": 0.9036446272415479, "learning_rate": 9.632463525595243e-06, "loss": 0.6682, "step": 2341 }, { "epoch": 0.3, "grad_norm": 0.692489419858691, "learning_rate": 9.632075227983963e-06, "loss": 0.5552, "step": 2342 }, { "epoch": 0.3, "grad_norm": 0.6542026424178569, "learning_rate": 9.63168673320004e-06, "loss": 0.5592, "step": 2343 }, { "epoch": 0.3, "grad_norm": 0.9882606063086841, "learning_rate": 9.631298041260018e-06, "loss": 0.6924, "step": 2344 }, { "epoch": 0.3, "grad_norm": 1.102230759389261, "learning_rate": 9.630909152180434e-06, "loss": 0.6162, "step": 2345 }, { "epoch": 0.3, "grad_norm": 0.7644577021931153, "learning_rate": 9.63052006597785e-06, "loss": 0.5817, "step": 2346 }, { "epoch": 0.3, "grad_norm": 0.716617409334602, "learning_rate": 9.630130782668818e-06, "loss": 0.5892, "step": 2347 }, { "epoch": 0.3, "grad_norm": 0.9729341324652461, "learning_rate": 9.629741302269918e-06, "loss": 0.6403, "step": 2348 }, { "epoch": 0.3, "grad_norm": 0.6853664457080397, "learning_rate": 9.629351624797725e-06, "loss": 0.6067, "step": 2349 }, { "epoch": 0.3, "grad_norm": 0.8342759285622677, "learning_rate": 9.628961750268825e-06, "loss": 0.632, "step": 2350 }, { "epoch": 0.3, "grad_norm": 0.7689119065965148, "learning_rate": 9.628571678699818e-06, "loss": 0.573, "step": 2351 }, { "epoch": 0.3, "grad_norm": 0.858385414084931, "learning_rate": 9.628181410107305e-06, "loss": 0.7072, "step": 2352 }, { "epoch": 0.3, "grad_norm": 0.7249149251190334, "learning_rate": 9.627790944507898e-06, "loss": 0.5645, "step": 2353 }, { "epoch": 0.3, "grad_norm": 0.6217419898343416, "learning_rate": 9.627400281918218e-06, "loss": 0.5539, "step": 2354 }, { "epoch": 0.3, "grad_norm": 0.7610052903814447, "learning_rate": 9.627009422354896e-06, "loss": 0.6443, "step": 2355 }, { "epoch": 0.3, "grad_norm": 0.9250303565161557, "learning_rate": 9.626618365834568e-06, "loss": 0.5487, "step": 2356 }, { "epoch": 0.3, "grad_norm": 0.8242894584329374, "learning_rate": 9.62622711237388e-06, "loss": 0.6333, "step": 2357 }, { "epoch": 0.3, "grad_norm": 0.8640955393357904, "learning_rate": 9.62583566198949e-06, "loss": 0.6112, "step": 2358 }, { "epoch": 0.3, "grad_norm": 0.5736128865275786, "learning_rate": 9.625444014698056e-06, "loss": 0.5134, "step": 2359 }, { "epoch": 0.3, "grad_norm": 0.8840592830174363, "learning_rate": 9.62505217051625e-06, "loss": 0.6405, "step": 2360 }, { "epoch": 0.3, "grad_norm": 0.782897049526451, "learning_rate": 9.624660129460756e-06, "loss": 0.6289, "step": 2361 }, { "epoch": 0.3, "grad_norm": 0.7876266615541588, "learning_rate": 9.624267891548257e-06, "loss": 0.6221, "step": 2362 }, { "epoch": 0.3, "grad_norm": 0.9185263520658862, "learning_rate": 9.62387545679545e-06, "loss": 0.691, "step": 2363 }, { "epoch": 0.3, "grad_norm": 0.6542667272945265, "learning_rate": 9.623482825219041e-06, "loss": 0.5247, "step": 2364 }, { "epoch": 0.3, "grad_norm": 0.6180807685369759, "learning_rate": 9.623089996835744e-06, "loss": 0.5468, "step": 2365 }, { "epoch": 0.3, "grad_norm": 0.713283927268299, "learning_rate": 9.622696971662278e-06, "loss": 0.6307, "step": 2366 }, { "epoch": 0.3, "grad_norm": 0.87003445307037, "learning_rate": 9.622303749715375e-06, "loss": 0.6911, "step": 2367 }, { "epoch": 0.3, "grad_norm": 0.94623684327798, "learning_rate": 9.621910331011769e-06, "loss": 0.6401, "step": 2368 }, { "epoch": 0.3, "grad_norm": 0.7600855773772919, "learning_rate": 9.621516715568212e-06, "loss": 0.5957, "step": 2369 }, { "epoch": 0.3, "grad_norm": 0.874117530399109, "learning_rate": 9.621122903401457e-06, "loss": 0.6094, "step": 2370 }, { "epoch": 0.3, "grad_norm": 0.8481734501936808, "learning_rate": 9.620728894528266e-06, "loss": 0.6145, "step": 2371 }, { "epoch": 0.3, "grad_norm": 0.7617368364319198, "learning_rate": 9.620334688965411e-06, "loss": 0.6298, "step": 2372 }, { "epoch": 0.3, "grad_norm": 0.7728250496801723, "learning_rate": 9.619940286729674e-06, "loss": 0.5981, "step": 2373 }, { "epoch": 0.3, "grad_norm": 0.6006432571031343, "learning_rate": 9.619545687837843e-06, "loss": 0.5815, "step": 2374 }, { "epoch": 0.3, "grad_norm": 0.7252849051821438, "learning_rate": 9.619150892306713e-06, "loss": 0.6159, "step": 2375 }, { "epoch": 0.3, "grad_norm": 0.6607678635799454, "learning_rate": 9.618755900153091e-06, "loss": 0.5698, "step": 2376 }, { "epoch": 0.3, "grad_norm": 0.594289771715513, "learning_rate": 9.618360711393789e-06, "loss": 0.5386, "step": 2377 }, { "epoch": 0.3, "grad_norm": 0.6907339889550254, "learning_rate": 9.61796532604563e-06, "loss": 0.57, "step": 2378 }, { "epoch": 0.3, "grad_norm": 0.6174250140212003, "learning_rate": 9.617569744125443e-06, "loss": 0.5633, "step": 2379 }, { "epoch": 0.3, "grad_norm": 0.8122324273378344, "learning_rate": 9.617173965650068e-06, "loss": 0.6107, "step": 2380 }, { "epoch": 0.3, "grad_norm": 1.092650696850718, "learning_rate": 9.616777990636353e-06, "loss": 0.6218, "step": 2381 }, { "epoch": 0.3, "grad_norm": 0.6128014853222221, "learning_rate": 9.616381819101151e-06, "loss": 0.5706, "step": 2382 }, { "epoch": 0.3, "grad_norm": 0.5657710730165172, "learning_rate": 9.615985451061327e-06, "loss": 0.4872, "step": 2383 }, { "epoch": 0.3, "grad_norm": 0.9024634261132601, "learning_rate": 9.615588886533753e-06, "loss": 0.6439, "step": 2384 }, { "epoch": 0.3, "grad_norm": 0.6598051106064764, "learning_rate": 9.615192125535308e-06, "loss": 0.5426, "step": 2385 }, { "epoch": 0.3, "grad_norm": 0.941218753522489, "learning_rate": 9.614795168082885e-06, "loss": 0.6302, "step": 2386 }, { "epoch": 0.3, "grad_norm": 0.6969140643920829, "learning_rate": 9.614398014193376e-06, "loss": 0.5691, "step": 2387 }, { "epoch": 0.3, "grad_norm": 0.6307159750972525, "learning_rate": 9.614000663883692e-06, "loss": 0.5168, "step": 2388 }, { "epoch": 0.3, "grad_norm": 0.7211489461599196, "learning_rate": 9.61360311717074e-06, "loss": 0.6418, "step": 2389 }, { "epoch": 0.3, "grad_norm": 0.6542520301640792, "learning_rate": 9.613205374071449e-06, "loss": 0.5681, "step": 2390 }, { "epoch": 0.3, "grad_norm": 0.7676464715721771, "learning_rate": 9.612807434602747e-06, "loss": 0.567, "step": 2391 }, { "epoch": 0.3, "grad_norm": 0.6810222458656829, "learning_rate": 9.612409298781571e-06, "loss": 0.5391, "step": 2392 }, { "epoch": 0.3, "grad_norm": 0.6421619680166654, "learning_rate": 9.612010966624871e-06, "loss": 0.5632, "step": 2393 }, { "epoch": 0.3, "grad_norm": 0.6334382080486921, "learning_rate": 9.611612438149604e-06, "loss": 0.5109, "step": 2394 }, { "epoch": 0.31, "grad_norm": 0.6000212034183585, "learning_rate": 9.61121371337273e-06, "loss": 0.5949, "step": 2395 }, { "epoch": 0.31, "grad_norm": 0.6461431312045264, "learning_rate": 9.610814792311223e-06, "loss": 0.5368, "step": 2396 }, { "epoch": 0.31, "grad_norm": 0.7603082067529316, "learning_rate": 9.610415674982066e-06, "loss": 0.6449, "step": 2397 }, { "epoch": 0.31, "grad_norm": 0.8991449463517307, "learning_rate": 9.610016361402246e-06, "loss": 0.6748, "step": 2398 }, { "epoch": 0.31, "grad_norm": 0.6349684387182754, "learning_rate": 9.60961685158876e-06, "loss": 0.5669, "step": 2399 }, { "epoch": 0.31, "grad_norm": 0.6248834326375421, "learning_rate": 9.609217145558617e-06, "loss": 0.5188, "step": 2400 }, { "epoch": 0.31, "grad_norm": 1.2104698116864234, "learning_rate": 9.608817243328827e-06, "loss": 0.7036, "step": 2401 }, { "epoch": 0.31, "grad_norm": 0.8889571647654392, "learning_rate": 9.608417144916417e-06, "loss": 0.6893, "step": 2402 }, { "epoch": 0.31, "grad_norm": 0.6054003487784059, "learning_rate": 9.608016850338413e-06, "loss": 0.5112, "step": 2403 }, { "epoch": 0.31, "grad_norm": 0.704592851394262, "learning_rate": 9.60761635961186e-06, "loss": 0.5493, "step": 2404 }, { "epoch": 0.31, "grad_norm": 0.551236892913067, "learning_rate": 9.607215672753799e-06, "loss": 0.4779, "step": 2405 }, { "epoch": 0.31, "grad_norm": 0.552987497017455, "learning_rate": 9.60681478978129e-06, "loss": 0.5193, "step": 2406 }, { "epoch": 0.31, "grad_norm": 0.5834679274972107, "learning_rate": 9.606413710711398e-06, "loss": 0.5557, "step": 2407 }, { "epoch": 0.31, "grad_norm": 0.8429905180044114, "learning_rate": 9.606012435561194e-06, "loss": 0.6261, "step": 2408 }, { "epoch": 0.31, "grad_norm": 0.5815754742957093, "learning_rate": 9.605610964347758e-06, "loss": 0.5468, "step": 2409 }, { "epoch": 0.31, "grad_norm": 0.5785602730013256, "learning_rate": 9.605209297088182e-06, "loss": 0.546, "step": 2410 }, { "epoch": 0.31, "grad_norm": 0.7059173564552337, "learning_rate": 9.604807433799563e-06, "loss": 0.583, "step": 2411 }, { "epoch": 0.31, "grad_norm": 0.5967318636854467, "learning_rate": 9.604405374499003e-06, "loss": 0.552, "step": 2412 }, { "epoch": 0.31, "grad_norm": 0.7672505433207766, "learning_rate": 9.604003119203624e-06, "loss": 0.5824, "step": 2413 }, { "epoch": 0.31, "grad_norm": 0.6569396385308869, "learning_rate": 9.603600667930542e-06, "loss": 0.5255, "step": 2414 }, { "epoch": 0.31, "grad_norm": 0.7784896520611584, "learning_rate": 9.603198020696892e-06, "loss": 0.5965, "step": 2415 }, { "epoch": 0.31, "grad_norm": 0.5673147912890423, "learning_rate": 9.60279517751981e-06, "loss": 0.497, "step": 2416 }, { "epoch": 0.31, "grad_norm": 0.6463402705649699, "learning_rate": 9.602392138416447e-06, "loss": 0.5272, "step": 2417 }, { "epoch": 0.31, "grad_norm": 0.6907065927904141, "learning_rate": 9.601988903403958e-06, "loss": 0.5938, "step": 2418 }, { "epoch": 0.31, "grad_norm": 0.8793334566508969, "learning_rate": 9.601585472499508e-06, "loss": 0.6642, "step": 2419 }, { "epoch": 0.31, "grad_norm": 0.7844761009137756, "learning_rate": 9.601181845720268e-06, "loss": 0.5851, "step": 2420 }, { "epoch": 0.31, "grad_norm": 0.67963826962421, "learning_rate": 9.60077802308342e-06, "loss": 0.5639, "step": 2421 }, { "epoch": 0.31, "grad_norm": 0.8693147114331978, "learning_rate": 9.600374004606153e-06, "loss": 0.5973, "step": 2422 }, { "epoch": 0.31, "grad_norm": 0.6078543400729967, "learning_rate": 9.599969790305667e-06, "loss": 0.5417, "step": 2423 }, { "epoch": 0.31, "grad_norm": 0.8805026740278471, "learning_rate": 9.599565380199164e-06, "loss": 0.6367, "step": 2424 }, { "epoch": 0.31, "grad_norm": 0.5795579343765204, "learning_rate": 9.599160774303863e-06, "loss": 0.505, "step": 2425 }, { "epoch": 0.31, "grad_norm": 0.8687021496402021, "learning_rate": 9.598755972636983e-06, "loss": 0.6368, "step": 2426 }, { "epoch": 0.31, "grad_norm": 0.7197724942923636, "learning_rate": 9.598350975215757e-06, "loss": 0.5701, "step": 2427 }, { "epoch": 0.31, "grad_norm": 0.723969890496662, "learning_rate": 9.597945782057427e-06, "loss": 0.6485, "step": 2428 }, { "epoch": 0.31, "grad_norm": 0.7725077900043601, "learning_rate": 9.597540393179235e-06, "loss": 0.6412, "step": 2429 }, { "epoch": 0.31, "grad_norm": 0.8068132619636262, "learning_rate": 9.59713480859844e-06, "loss": 0.7052, "step": 2430 }, { "epoch": 0.31, "grad_norm": 0.624133680104771, "learning_rate": 9.596729028332309e-06, "loss": 0.5699, "step": 2431 }, { "epoch": 0.31, "grad_norm": 0.7688880035065165, "learning_rate": 9.596323052398112e-06, "loss": 0.6272, "step": 2432 }, { "epoch": 0.31, "grad_norm": 0.8384046807259603, "learning_rate": 9.595916880813127e-06, "loss": 0.6034, "step": 2433 }, { "epoch": 0.31, "grad_norm": 0.769858270127807, "learning_rate": 9.59551051359465e-06, "loss": 0.5621, "step": 2434 }, { "epoch": 0.31, "grad_norm": 0.6400763534881783, "learning_rate": 9.595103950759974e-06, "loss": 0.5397, "step": 2435 }, { "epoch": 0.31, "grad_norm": 0.8557673589892321, "learning_rate": 9.594697192326408e-06, "loss": 0.5849, "step": 2436 }, { "epoch": 0.31, "grad_norm": 0.6461724804678096, "learning_rate": 9.594290238311264e-06, "loss": 0.5297, "step": 2437 }, { "epoch": 0.31, "grad_norm": 0.9378438918064688, "learning_rate": 9.593883088731866e-06, "loss": 0.6056, "step": 2438 }, { "epoch": 0.31, "grad_norm": 0.8683897724391, "learning_rate": 9.593475743605546e-06, "loss": 0.6301, "step": 2439 }, { "epoch": 0.31, "grad_norm": 0.8675075549901747, "learning_rate": 9.593068202949642e-06, "loss": 0.6534, "step": 2440 }, { "epoch": 0.31, "grad_norm": 0.6497677519019572, "learning_rate": 9.5926604667815e-06, "loss": 0.5513, "step": 2441 }, { "epoch": 0.31, "grad_norm": 0.6224714071357573, "learning_rate": 9.59225253511848e-06, "loss": 0.5239, "step": 2442 }, { "epoch": 0.31, "grad_norm": 0.8110296456715863, "learning_rate": 9.591844407977944e-06, "loss": 0.6376, "step": 2443 }, { "epoch": 0.31, "grad_norm": 0.5785413804034262, "learning_rate": 9.591436085377263e-06, "loss": 0.5523, "step": 2444 }, { "epoch": 0.31, "grad_norm": 0.9300910909493508, "learning_rate": 9.591027567333822e-06, "loss": 0.6671, "step": 2445 }, { "epoch": 0.31, "grad_norm": 0.7967115344511013, "learning_rate": 9.590618853865008e-06, "loss": 0.6202, "step": 2446 }, { "epoch": 0.31, "grad_norm": 0.7293896493512114, "learning_rate": 9.590209944988218e-06, "loss": 0.5719, "step": 2447 }, { "epoch": 0.31, "grad_norm": 0.6017138154585803, "learning_rate": 9.58980084072086e-06, "loss": 0.5371, "step": 2448 }, { "epoch": 0.31, "grad_norm": 0.6763495265701278, "learning_rate": 9.589391541080346e-06, "loss": 0.5398, "step": 2449 }, { "epoch": 0.31, "grad_norm": 0.633634695181765, "learning_rate": 9.588982046084101e-06, "loss": 0.5232, "step": 2450 }, { "epoch": 0.31, "grad_norm": 0.5540877292722046, "learning_rate": 9.588572355749555e-06, "loss": 0.5284, "step": 2451 }, { "epoch": 0.31, "grad_norm": 0.9761110237084909, "learning_rate": 9.588162470094145e-06, "loss": 0.665, "step": 2452 }, { "epoch": 0.31, "grad_norm": 1.223077998431946, "learning_rate": 9.58775238913532e-06, "loss": 0.6697, "step": 2453 }, { "epoch": 0.31, "grad_norm": 0.5979268632542212, "learning_rate": 9.587342112890539e-06, "loss": 0.5319, "step": 2454 }, { "epoch": 0.31, "grad_norm": 0.7510362212559786, "learning_rate": 9.586931641377262e-06, "loss": 0.6268, "step": 2455 }, { "epoch": 0.31, "grad_norm": 0.7176335642595973, "learning_rate": 9.58652097461296e-06, "loss": 0.5559, "step": 2456 }, { "epoch": 0.31, "grad_norm": 0.6989847542259999, "learning_rate": 9.58611011261512e-06, "loss": 0.5358, "step": 2457 }, { "epoch": 0.31, "grad_norm": 0.8864963162315552, "learning_rate": 9.585699055401226e-06, "loss": 0.6491, "step": 2458 }, { "epoch": 0.31, "grad_norm": 0.6172125182878831, "learning_rate": 9.585287802988778e-06, "loss": 0.5386, "step": 2459 }, { "epoch": 0.31, "grad_norm": 0.7234782866031692, "learning_rate": 9.58487635539528e-06, "loss": 0.5292, "step": 2460 }, { "epoch": 0.31, "grad_norm": 0.6549233588633315, "learning_rate": 9.584464712638245e-06, "loss": 0.5821, "step": 2461 }, { "epoch": 0.31, "grad_norm": 0.7295733671228533, "learning_rate": 9.584052874735201e-06, "loss": 0.6253, "step": 2462 }, { "epoch": 0.31, "grad_norm": 0.5980296759480169, "learning_rate": 9.583640841703672e-06, "loss": 0.4917, "step": 2463 }, { "epoch": 0.31, "grad_norm": 0.6224250812587412, "learning_rate": 9.5832286135612e-06, "loss": 0.5461, "step": 2464 }, { "epoch": 0.31, "grad_norm": 0.6684864407034281, "learning_rate": 9.582816190325333e-06, "loss": 0.5555, "step": 2465 }, { "epoch": 0.31, "grad_norm": 0.6233339430704379, "learning_rate": 9.582403572013623e-06, "loss": 0.5446, "step": 2466 }, { "epoch": 0.31, "grad_norm": 0.7842371818556565, "learning_rate": 9.58199075864364e-06, "loss": 0.6205, "step": 2467 }, { "epoch": 0.31, "grad_norm": 0.7908366264740696, "learning_rate": 9.581577750232948e-06, "loss": 0.5679, "step": 2468 }, { "epoch": 0.31, "grad_norm": 0.7666704393633019, "learning_rate": 9.581164546799135e-06, "loss": 0.6375, "step": 2469 }, { "epoch": 0.31, "grad_norm": 0.5828591565212466, "learning_rate": 9.580751148359785e-06, "loss": 0.5457, "step": 2470 }, { "epoch": 0.31, "grad_norm": 0.6463208179192446, "learning_rate": 9.580337554932497e-06, "loss": 0.6051, "step": 2471 }, { "epoch": 0.31, "grad_norm": 0.6733956994877681, "learning_rate": 9.579923766534875e-06, "loss": 0.5557, "step": 2472 }, { "epoch": 0.32, "grad_norm": 0.7015158626325191, "learning_rate": 9.579509783184535e-06, "loss": 0.6169, "step": 2473 }, { "epoch": 0.32, "grad_norm": 0.5563294339181992, "learning_rate": 9.579095604899097e-06, "loss": 0.5075, "step": 2474 }, { "epoch": 0.32, "grad_norm": 0.9959362943126006, "learning_rate": 9.578681231696191e-06, "loss": 0.6288, "step": 2475 }, { "epoch": 0.32, "grad_norm": 0.7535498086676388, "learning_rate": 9.578266663593458e-06, "loss": 0.7042, "step": 2476 }, { "epoch": 0.32, "grad_norm": 0.825513303560254, "learning_rate": 9.577851900608541e-06, "loss": 0.6261, "step": 2477 }, { "epoch": 0.32, "grad_norm": 0.7635947749024156, "learning_rate": 9.5774369427591e-06, "loss": 0.5794, "step": 2478 }, { "epoch": 0.32, "grad_norm": 0.5658271771333826, "learning_rate": 9.577021790062794e-06, "loss": 0.4903, "step": 2479 }, { "epoch": 0.32, "grad_norm": 0.7353477955814989, "learning_rate": 9.576606442537297e-06, "loss": 0.5989, "step": 2480 }, { "epoch": 0.32, "grad_norm": 0.7766002116136786, "learning_rate": 9.576190900200288e-06, "loss": 0.6137, "step": 2481 }, { "epoch": 0.32, "grad_norm": 0.889908014101899, "learning_rate": 9.575775163069456e-06, "loss": 0.6999, "step": 2482 }, { "epoch": 0.32, "grad_norm": 0.9757343338918586, "learning_rate": 9.575359231162497e-06, "loss": 0.5649, "step": 2483 }, { "epoch": 0.32, "grad_norm": 0.7849444011005536, "learning_rate": 9.574943104497118e-06, "loss": 0.6195, "step": 2484 }, { "epoch": 0.32, "grad_norm": 0.6463259656534145, "learning_rate": 9.574526783091029e-06, "loss": 0.5361, "step": 2485 }, { "epoch": 0.32, "grad_norm": 0.6617501805839066, "learning_rate": 9.574110266961953e-06, "loss": 0.5729, "step": 2486 }, { "epoch": 0.32, "grad_norm": 0.7279993137004347, "learning_rate": 9.573693556127618e-06, "loss": 0.5579, "step": 2487 }, { "epoch": 0.32, "grad_norm": 0.7996822044827662, "learning_rate": 9.573276650605768e-06, "loss": 0.6384, "step": 2488 }, { "epoch": 0.32, "grad_norm": 0.5908133831685772, "learning_rate": 9.572859550414143e-06, "loss": 0.5495, "step": 2489 }, { "epoch": 0.32, "grad_norm": 0.7043086949520345, "learning_rate": 9.572442255570498e-06, "loss": 0.5855, "step": 2490 }, { "epoch": 0.32, "grad_norm": 0.8993169692043346, "learning_rate": 9.5720247660926e-06, "loss": 0.536, "step": 2491 }, { "epoch": 0.32, "grad_norm": 0.7928167705587933, "learning_rate": 9.571607081998216e-06, "loss": 0.5852, "step": 2492 }, { "epoch": 0.32, "grad_norm": 0.7593312595814335, "learning_rate": 9.571189203305128e-06, "loss": 0.6027, "step": 2493 }, { "epoch": 0.32, "grad_norm": 0.6564232624986837, "learning_rate": 9.570771130031123e-06, "loss": 0.6041, "step": 2494 }, { "epoch": 0.32, "grad_norm": 0.6221530793613381, "learning_rate": 9.570352862193998e-06, "loss": 0.5808, "step": 2495 }, { "epoch": 0.32, "grad_norm": 0.7136315212099565, "learning_rate": 9.569934399811556e-06, "loss": 0.5321, "step": 2496 }, { "epoch": 0.32, "grad_norm": 0.7963988455937888, "learning_rate": 9.56951574290161e-06, "loss": 0.6246, "step": 2497 }, { "epoch": 0.32, "grad_norm": 1.014207828651746, "learning_rate": 9.569096891481979e-06, "loss": 0.624, "step": 2498 }, { "epoch": 0.32, "grad_norm": 0.5695149473103667, "learning_rate": 9.568677845570497e-06, "loss": 0.5503, "step": 2499 }, { "epoch": 0.32, "grad_norm": 0.5961049589529291, "learning_rate": 9.568258605184996e-06, "loss": 0.4863, "step": 2500 }, { "epoch": 0.32, "grad_norm": 0.6857720938128696, "learning_rate": 9.567839170343327e-06, "loss": 0.5623, "step": 2501 }, { "epoch": 0.32, "grad_norm": 0.8282032857138397, "learning_rate": 9.567419541063338e-06, "loss": 0.5572, "step": 2502 }, { "epoch": 0.32, "grad_norm": 0.6893026749200307, "learning_rate": 9.566999717362897e-06, "loss": 0.5443, "step": 2503 }, { "epoch": 0.32, "grad_norm": 0.7768239746749537, "learning_rate": 9.56657969925987e-06, "loss": 0.5686, "step": 2504 }, { "epoch": 0.32, "grad_norm": 0.5597594429832988, "learning_rate": 9.56615948677214e-06, "loss": 0.5284, "step": 2505 }, { "epoch": 0.32, "grad_norm": 0.7564168137120164, "learning_rate": 9.565739079917591e-06, "loss": 0.5774, "step": 2506 }, { "epoch": 0.32, "grad_norm": 0.7641672679370525, "learning_rate": 9.56531847871412e-06, "loss": 0.6491, "step": 2507 }, { "epoch": 0.32, "grad_norm": 0.9111016128120439, "learning_rate": 9.56489768317963e-06, "loss": 0.6339, "step": 2508 }, { "epoch": 0.32, "grad_norm": 0.8567261134162234, "learning_rate": 9.564476693332032e-06, "loss": 0.6073, "step": 2509 }, { "epoch": 0.32, "grad_norm": 0.8562793246556688, "learning_rate": 9.564055509189247e-06, "loss": 0.6335, "step": 2510 }, { "epoch": 0.32, "grad_norm": 0.6186671603918202, "learning_rate": 9.563634130769206e-06, "loss": 0.5171, "step": 2511 }, { "epoch": 0.32, "grad_norm": 0.639540957253105, "learning_rate": 9.56321255808984e-06, "loss": 0.6051, "step": 2512 }, { "epoch": 0.32, "grad_norm": 0.6801078881261133, "learning_rate": 9.5627907911691e-06, "loss": 0.5157, "step": 2513 }, { "epoch": 0.32, "grad_norm": 0.6373131560688663, "learning_rate": 9.562368830024935e-06, "loss": 0.4953, "step": 2514 }, { "epoch": 0.32, "grad_norm": 0.7733377559227137, "learning_rate": 9.561946674675308e-06, "loss": 0.6336, "step": 2515 }, { "epoch": 0.32, "grad_norm": 0.8001748975946251, "learning_rate": 9.561524325138192e-06, "loss": 0.657, "step": 2516 }, { "epoch": 0.32, "grad_norm": 0.8574515862304499, "learning_rate": 9.561101781431558e-06, "loss": 0.6496, "step": 2517 }, { "epoch": 0.32, "grad_norm": 0.8432913593456502, "learning_rate": 9.5606790435734e-06, "loss": 0.6368, "step": 2518 }, { "epoch": 0.32, "grad_norm": 0.7313829825713054, "learning_rate": 9.560256111581705e-06, "loss": 0.604, "step": 2519 }, { "epoch": 0.32, "grad_norm": 0.6259317550178952, "learning_rate": 9.559832985474482e-06, "loss": 0.594, "step": 2520 }, { "epoch": 0.32, "grad_norm": 0.6732396527372642, "learning_rate": 9.559409665269741e-06, "loss": 0.5467, "step": 2521 }, { "epoch": 0.32, "grad_norm": 0.9823587856496049, "learning_rate": 9.5589861509855e-06, "loss": 0.636, "step": 2522 }, { "epoch": 0.32, "grad_norm": 0.6743790179134761, "learning_rate": 9.558562442639785e-06, "loss": 0.5974, "step": 2523 }, { "epoch": 0.32, "grad_norm": 0.8780746135749471, "learning_rate": 9.558138540250636e-06, "loss": 0.6705, "step": 2524 }, { "epoch": 0.32, "grad_norm": 0.8130945511228779, "learning_rate": 9.557714443836093e-06, "loss": 0.6641, "step": 2525 }, { "epoch": 0.32, "grad_norm": 0.7906361142613465, "learning_rate": 9.557290153414211e-06, "loss": 0.636, "step": 2526 }, { "epoch": 0.32, "grad_norm": 0.7101826950871989, "learning_rate": 9.556865669003053e-06, "loss": 0.5826, "step": 2527 }, { "epoch": 0.32, "grad_norm": 0.6076279506595039, "learning_rate": 9.556440990620682e-06, "loss": 0.5123, "step": 2528 }, { "epoch": 0.32, "grad_norm": 0.6120283623210143, "learning_rate": 9.556016118285178e-06, "loss": 0.5538, "step": 2529 }, { "epoch": 0.32, "grad_norm": 0.7650111755183286, "learning_rate": 9.555591052014629e-06, "loss": 0.5438, "step": 2530 }, { "epoch": 0.32, "grad_norm": 0.5349207680266397, "learning_rate": 9.555165791827125e-06, "loss": 0.5127, "step": 2531 }, { "epoch": 0.32, "grad_norm": 0.7083003864694378, "learning_rate": 9.55474033774077e-06, "loss": 0.5487, "step": 2532 }, { "epoch": 0.32, "grad_norm": 1.079547641280141, "learning_rate": 9.554314689773674e-06, "loss": 0.6343, "step": 2533 }, { "epoch": 0.32, "grad_norm": 0.6465127203315632, "learning_rate": 9.553888847943956e-06, "loss": 0.5409, "step": 2534 }, { "epoch": 0.32, "grad_norm": 0.6650303012598022, "learning_rate": 9.55346281226974e-06, "loss": 0.5893, "step": 2535 }, { "epoch": 0.32, "grad_norm": 0.6110300011948008, "learning_rate": 9.553036582769164e-06, "loss": 0.4981, "step": 2536 }, { "epoch": 0.32, "grad_norm": 0.8100895984954982, "learning_rate": 9.552610159460369e-06, "loss": 0.6186, "step": 2537 }, { "epoch": 0.32, "grad_norm": 0.8179220677587817, "learning_rate": 9.552183542361508e-06, "loss": 0.5529, "step": 2538 }, { "epoch": 0.32, "grad_norm": 0.5952520994167613, "learning_rate": 9.551756731490742e-06, "loss": 0.522, "step": 2539 }, { "epoch": 0.32, "grad_norm": 0.8226784584113425, "learning_rate": 9.551329726866235e-06, "loss": 0.5815, "step": 2540 }, { "epoch": 0.32, "grad_norm": 0.5704498044176846, "learning_rate": 9.550902528506168e-06, "loss": 0.4416, "step": 2541 }, { "epoch": 0.32, "grad_norm": 0.604239828616303, "learning_rate": 9.550475136428721e-06, "loss": 0.5282, "step": 2542 }, { "epoch": 0.32, "grad_norm": 0.649238403244341, "learning_rate": 9.550047550652089e-06, "loss": 0.5562, "step": 2543 }, { "epoch": 0.32, "grad_norm": 0.6570463366363107, "learning_rate": 9.549619771194472e-06, "loss": 0.5762, "step": 2544 }, { "epoch": 0.32, "grad_norm": 0.6351487994969179, "learning_rate": 9.54919179807408e-06, "loss": 0.5349, "step": 2545 }, { "epoch": 0.32, "grad_norm": 0.648900949394083, "learning_rate": 9.548763631309132e-06, "loss": 0.5566, "step": 2546 }, { "epoch": 0.32, "grad_norm": 0.5933008133983052, "learning_rate": 9.54833527091785e-06, "loss": 0.5216, "step": 2547 }, { "epoch": 0.32, "grad_norm": 0.5900970115260598, "learning_rate": 9.547906716918472e-06, "loss": 0.4871, "step": 2548 }, { "epoch": 0.32, "grad_norm": 0.6477875891595827, "learning_rate": 9.547477969329235e-06, "loss": 0.5577, "step": 2549 }, { "epoch": 0.32, "grad_norm": 0.5294932860860582, "learning_rate": 9.547049028168395e-06, "loss": 0.4855, "step": 2550 }, { "epoch": 0.32, "grad_norm": 0.7605284892461301, "learning_rate": 9.546619893454208e-06, "loss": 0.5526, "step": 2551 }, { "epoch": 0.33, "grad_norm": 0.8821681184089893, "learning_rate": 9.546190565204941e-06, "loss": 0.6431, "step": 2552 }, { "epoch": 0.33, "grad_norm": 0.5972036879598572, "learning_rate": 9.545761043438868e-06, "loss": 0.5477, "step": 2553 }, { "epoch": 0.33, "grad_norm": 0.7774109891516569, "learning_rate": 9.545331328174274e-06, "loss": 0.6452, "step": 2554 }, { "epoch": 0.33, "grad_norm": 0.559993123168119, "learning_rate": 9.544901419429452e-06, "loss": 0.4755, "step": 2555 }, { "epoch": 0.33, "grad_norm": 0.9812318418974344, "learning_rate": 9.544471317222696e-06, "loss": 0.631, "step": 2556 }, { "epoch": 0.33, "grad_norm": 0.5700560989878682, "learning_rate": 9.54404102157232e-06, "loss": 0.5152, "step": 2557 }, { "epoch": 0.33, "grad_norm": 0.6060204128545266, "learning_rate": 9.54361053249664e-06, "loss": 0.48, "step": 2558 }, { "epoch": 0.33, "grad_norm": 0.682067997060381, "learning_rate": 9.543179850013978e-06, "loss": 0.5346, "step": 2559 }, { "epoch": 0.33, "grad_norm": 1.1084238206703774, "learning_rate": 9.542748974142668e-06, "loss": 0.6243, "step": 2560 }, { "epoch": 0.33, "grad_norm": 0.8378037363051217, "learning_rate": 9.542317904901049e-06, "loss": 0.5972, "step": 2561 }, { "epoch": 0.33, "grad_norm": 0.7135604533169227, "learning_rate": 9.541886642307473e-06, "loss": 0.6139, "step": 2562 }, { "epoch": 0.33, "grad_norm": 0.6945523477457736, "learning_rate": 9.541455186380297e-06, "loss": 0.556, "step": 2563 }, { "epoch": 0.33, "grad_norm": 0.8105865099203851, "learning_rate": 9.541023537137885e-06, "loss": 0.662, "step": 2564 }, { "epoch": 0.33, "grad_norm": 0.9723262911446988, "learning_rate": 9.540591694598615e-06, "loss": 0.6237, "step": 2565 }, { "epoch": 0.33, "grad_norm": 0.6284942090793638, "learning_rate": 9.540159658780862e-06, "loss": 0.5601, "step": 2566 }, { "epoch": 0.33, "grad_norm": 0.7943621277394831, "learning_rate": 9.539727429703024e-06, "loss": 0.6296, "step": 2567 }, { "epoch": 0.33, "grad_norm": 0.646599463677551, "learning_rate": 9.539295007383496e-06, "loss": 0.5127, "step": 2568 }, { "epoch": 0.33, "grad_norm": 0.714891775660591, "learning_rate": 9.538862391840683e-06, "loss": 0.5886, "step": 2569 }, { "epoch": 0.33, "grad_norm": 0.7683683299777343, "learning_rate": 9.538429583093003e-06, "loss": 0.6505, "step": 2570 }, { "epoch": 0.33, "grad_norm": 0.8068531708286238, "learning_rate": 9.537996581158878e-06, "loss": 0.5425, "step": 2571 }, { "epoch": 0.33, "grad_norm": 0.7633809149010669, "learning_rate": 9.53756338605674e-06, "loss": 0.5695, "step": 2572 }, { "epoch": 0.33, "grad_norm": 0.5927862910594637, "learning_rate": 9.53712999780503e-06, "loss": 0.5093, "step": 2573 }, { "epoch": 0.33, "grad_norm": 0.6329786480367147, "learning_rate": 9.536696416422194e-06, "loss": 0.533, "step": 2574 }, { "epoch": 0.33, "grad_norm": 0.7403403535131283, "learning_rate": 9.536262641926689e-06, "loss": 0.6452, "step": 2575 }, { "epoch": 0.33, "grad_norm": 0.5918314936786386, "learning_rate": 9.535828674336976e-06, "loss": 0.4818, "step": 2576 }, { "epoch": 0.33, "grad_norm": 0.5683143465029358, "learning_rate": 9.535394513671534e-06, "loss": 0.4978, "step": 2577 }, { "epoch": 0.33, "grad_norm": 1.233205772739789, "learning_rate": 9.534960159948841e-06, "loss": 0.6469, "step": 2578 }, { "epoch": 0.33, "grad_norm": 0.9695389770572457, "learning_rate": 9.534525613187383e-06, "loss": 0.6816, "step": 2579 }, { "epoch": 0.33, "grad_norm": 0.5381105391785254, "learning_rate": 9.534090873405662e-06, "loss": 0.511, "step": 2580 }, { "epoch": 0.33, "grad_norm": 1.190502374810711, "learning_rate": 9.53365594062218e-06, "loss": 0.5892, "step": 2581 }, { "epoch": 0.33, "grad_norm": 0.8059850644306538, "learning_rate": 9.533220814855453e-06, "loss": 0.6301, "step": 2582 }, { "epoch": 0.33, "grad_norm": 0.6498408537331678, "learning_rate": 9.532785496124003e-06, "loss": 0.5565, "step": 2583 }, { "epoch": 0.33, "grad_norm": 0.5783800531387244, "learning_rate": 9.532349984446358e-06, "loss": 0.5364, "step": 2584 }, { "epoch": 0.33, "grad_norm": 0.9546698067137003, "learning_rate": 9.531914279841057e-06, "loss": 0.5862, "step": 2585 }, { "epoch": 0.33, "grad_norm": 0.5701886528114443, "learning_rate": 9.531478382326647e-06, "loss": 0.5115, "step": 2586 }, { "epoch": 0.33, "grad_norm": 0.6303662784078251, "learning_rate": 9.531042291921685e-06, "loss": 0.5237, "step": 2587 }, { "epoch": 0.33, "grad_norm": 0.6637468226996036, "learning_rate": 9.53060600864473e-06, "loss": 0.4938, "step": 2588 }, { "epoch": 0.33, "grad_norm": 0.7976804502105883, "learning_rate": 9.530169532514355e-06, "loss": 0.4904, "step": 2589 }, { "epoch": 0.33, "grad_norm": 1.8480503149393777, "learning_rate": 9.52973286354914e-06, "loss": 0.6147, "step": 2590 }, { "epoch": 0.33, "grad_norm": 0.5410025719543855, "learning_rate": 9.529296001767669e-06, "loss": 0.4789, "step": 2591 }, { "epoch": 0.33, "grad_norm": 0.6944545405994262, "learning_rate": 9.528858947188543e-06, "loss": 0.5154, "step": 2592 }, { "epoch": 0.33, "grad_norm": 0.5943387449210243, "learning_rate": 9.528421699830365e-06, "loss": 0.5479, "step": 2593 }, { "epoch": 0.33, "grad_norm": 0.7460414739115181, "learning_rate": 9.527984259711744e-06, "loss": 0.5879, "step": 2594 }, { "epoch": 0.33, "grad_norm": 0.6829949307460953, "learning_rate": 9.527546626851306e-06, "loss": 0.612, "step": 2595 }, { "epoch": 0.33, "grad_norm": 0.7688717836898237, "learning_rate": 9.527108801267673e-06, "loss": 0.5747, "step": 2596 }, { "epoch": 0.33, "grad_norm": 0.6935414862682998, "learning_rate": 9.526670782979483e-06, "loss": 0.5645, "step": 2597 }, { "epoch": 0.33, "grad_norm": 0.91662531566737, "learning_rate": 9.526232572005387e-06, "loss": 0.6257, "step": 2598 }, { "epoch": 0.33, "grad_norm": 0.6404437087562576, "learning_rate": 9.525794168364032e-06, "loss": 0.5792, "step": 2599 }, { "epoch": 0.33, "grad_norm": 0.9336752361852743, "learning_rate": 9.525355572074083e-06, "loss": 0.6525, "step": 2600 }, { "epoch": 0.33, "grad_norm": 0.6188451823758374, "learning_rate": 9.524916783154206e-06, "loss": 0.5327, "step": 2601 }, { "epoch": 0.33, "grad_norm": 0.595947713758538, "learning_rate": 9.524477801623083e-06, "loss": 0.5018, "step": 2602 }, { "epoch": 0.33, "grad_norm": 0.7602569644562919, "learning_rate": 9.524038627499396e-06, "loss": 0.5526, "step": 2603 }, { "epoch": 0.33, "grad_norm": 1.1390855016538206, "learning_rate": 9.523599260801845e-06, "loss": 0.6233, "step": 2604 }, { "epoch": 0.33, "grad_norm": 0.5826783634067593, "learning_rate": 9.523159701549126e-06, "loss": 0.4934, "step": 2605 }, { "epoch": 0.33, "grad_norm": 0.6096115493108369, "learning_rate": 9.522719949759953e-06, "loss": 0.5076, "step": 2606 }, { "epoch": 0.33, "grad_norm": 0.7390090601415761, "learning_rate": 9.522280005453045e-06, "loss": 0.6452, "step": 2607 }, { "epoch": 0.33, "grad_norm": 0.6902664668253552, "learning_rate": 9.521839868647128e-06, "loss": 0.585, "step": 2608 }, { "epoch": 0.33, "grad_norm": 0.8791964525039503, "learning_rate": 9.521399539360937e-06, "loss": 0.6836, "step": 2609 }, { "epoch": 0.33, "grad_norm": 0.6490246657183921, "learning_rate": 9.520959017613215e-06, "loss": 0.5153, "step": 2610 }, { "epoch": 0.33, "grad_norm": 0.5809035468751219, "learning_rate": 9.520518303422718e-06, "loss": 0.5684, "step": 2611 }, { "epoch": 0.33, "grad_norm": 0.6707458810382166, "learning_rate": 9.5200773968082e-06, "loss": 0.5169, "step": 2612 }, { "epoch": 0.33, "grad_norm": 0.6345208821443088, "learning_rate": 9.51963629778843e-06, "loss": 0.5915, "step": 2613 }, { "epoch": 0.33, "grad_norm": 0.530845563877577, "learning_rate": 9.519195006382188e-06, "loss": 0.517, "step": 2614 }, { "epoch": 0.33, "grad_norm": 0.835407233106553, "learning_rate": 9.518753522608255e-06, "loss": 0.6703, "step": 2615 }, { "epoch": 0.33, "grad_norm": 0.6793471378124661, "learning_rate": 9.518311846485424e-06, "loss": 0.5751, "step": 2616 }, { "epoch": 0.33, "grad_norm": 0.7349933974032022, "learning_rate": 9.517869978032497e-06, "loss": 0.5582, "step": 2617 }, { "epoch": 0.33, "grad_norm": 1.074984731694823, "learning_rate": 9.51742791726828e-06, "loss": 0.6236, "step": 2618 }, { "epoch": 0.33, "grad_norm": 0.9201587831352932, "learning_rate": 9.516985664211595e-06, "loss": 0.5787, "step": 2619 }, { "epoch": 0.33, "grad_norm": 0.6485537188339017, "learning_rate": 9.516543218881264e-06, "loss": 0.5675, "step": 2620 }, { "epoch": 0.33, "grad_norm": 0.7191479145391688, "learning_rate": 9.51610058129612e-06, "loss": 0.5694, "step": 2621 }, { "epoch": 0.33, "grad_norm": 0.8073994848174533, "learning_rate": 9.515657751475005e-06, "loss": 0.653, "step": 2622 }, { "epoch": 0.33, "grad_norm": 0.620470519151046, "learning_rate": 9.51521472943677e-06, "loss": 0.5594, "step": 2623 }, { "epoch": 0.33, "grad_norm": 0.8577981010508564, "learning_rate": 9.514771515200273e-06, "loss": 0.5185, "step": 2624 }, { "epoch": 0.33, "grad_norm": 0.5613299806463133, "learning_rate": 9.51432810878438e-06, "loss": 0.5347, "step": 2625 }, { "epoch": 0.33, "grad_norm": 0.6905584167856317, "learning_rate": 9.513884510207965e-06, "loss": 0.5404, "step": 2626 }, { "epoch": 0.33, "grad_norm": 0.5787359917483325, "learning_rate": 9.51344071948991e-06, "loss": 0.5663, "step": 2627 }, { "epoch": 0.33, "grad_norm": 0.6451525074311069, "learning_rate": 9.512996736649107e-06, "loss": 0.5533, "step": 2628 }, { "epoch": 0.33, "grad_norm": 0.6246696688029569, "learning_rate": 9.512552561704454e-06, "loss": 0.5496, "step": 2629 }, { "epoch": 0.34, "grad_norm": 0.6815441549936629, "learning_rate": 9.51210819467486e-06, "loss": 0.5343, "step": 2630 }, { "epoch": 0.34, "grad_norm": 0.8360184672663747, "learning_rate": 9.511663635579237e-06, "loss": 0.657, "step": 2631 }, { "epoch": 0.34, "grad_norm": 0.7001008146479152, "learning_rate": 9.51121888443651e-06, "loss": 0.6144, "step": 2632 }, { "epoch": 0.34, "grad_norm": 0.832107455918356, "learning_rate": 9.510773941265612e-06, "loss": 0.5801, "step": 2633 }, { "epoch": 0.34, "grad_norm": 0.6515346390702125, "learning_rate": 9.510328806085483e-06, "loss": 0.536, "step": 2634 }, { "epoch": 0.34, "grad_norm": 0.5205168551152277, "learning_rate": 9.509883478915068e-06, "loss": 0.4978, "step": 2635 }, { "epoch": 0.34, "grad_norm": 0.8758102289762094, "learning_rate": 9.509437959773324e-06, "loss": 0.6378, "step": 2636 }, { "epoch": 0.34, "grad_norm": 0.7007231004619369, "learning_rate": 9.508992248679217e-06, "loss": 0.5801, "step": 2637 }, { "epoch": 0.34, "grad_norm": 2.6204772699830503, "learning_rate": 9.50854634565172e-06, "loss": 0.6512, "step": 2638 }, { "epoch": 0.34, "grad_norm": 0.969055388010146, "learning_rate": 9.508100250709809e-06, "loss": 0.6135, "step": 2639 }, { "epoch": 0.34, "grad_norm": 1.1653372937097857, "learning_rate": 9.507653963872479e-06, "loss": 0.5752, "step": 2640 }, { "epoch": 0.34, "grad_norm": 0.6896592730666222, "learning_rate": 9.507207485158724e-06, "loss": 0.5448, "step": 2641 }, { "epoch": 0.34, "grad_norm": 1.0668974797658888, "learning_rate": 9.506760814587547e-06, "loss": 0.6443, "step": 2642 }, { "epoch": 0.34, "grad_norm": 0.8925707789010904, "learning_rate": 9.506313952177966e-06, "loss": 0.6913, "step": 2643 }, { "epoch": 0.34, "grad_norm": 0.7922544781589685, "learning_rate": 9.505866897949e-06, "loss": 0.5695, "step": 2644 }, { "epoch": 0.34, "grad_norm": 0.5977798601691642, "learning_rate": 9.505419651919678e-06, "loss": 0.5472, "step": 2645 }, { "epoch": 0.34, "grad_norm": 0.5857657542808271, "learning_rate": 9.50497221410904e-06, "loss": 0.5256, "step": 2646 }, { "epoch": 0.34, "grad_norm": 0.6269816345974752, "learning_rate": 9.50452458453613e-06, "loss": 0.5469, "step": 2647 }, { "epoch": 0.34, "grad_norm": 0.6637418482259861, "learning_rate": 9.504076763220003e-06, "loss": 0.5697, "step": 2648 }, { "epoch": 0.34, "grad_norm": 0.7637371397015333, "learning_rate": 9.503628750179719e-06, "loss": 0.568, "step": 2649 }, { "epoch": 0.34, "grad_norm": 0.939164956586822, "learning_rate": 9.503180545434353e-06, "loss": 0.6734, "step": 2650 }, { "epoch": 0.34, "grad_norm": 0.8160952788765521, "learning_rate": 9.50273214900298e-06, "loss": 0.589, "step": 2651 }, { "epoch": 0.34, "grad_norm": 0.664853124608123, "learning_rate": 9.50228356090469e-06, "loss": 0.5112, "step": 2652 }, { "epoch": 0.34, "grad_norm": 0.6596469254493981, "learning_rate": 9.501834781158574e-06, "loss": 0.5901, "step": 2653 }, { "epoch": 0.34, "grad_norm": 0.6962722835328786, "learning_rate": 9.501385809783739e-06, "loss": 0.5398, "step": 2654 }, { "epoch": 0.34, "grad_norm": 0.6632304354029682, "learning_rate": 9.500936646799293e-06, "loss": 0.5534, "step": 2655 }, { "epoch": 0.34, "grad_norm": 0.847071596337105, "learning_rate": 9.500487292224357e-06, "loss": 0.6602, "step": 2656 }, { "epoch": 0.34, "grad_norm": 0.6594231855021783, "learning_rate": 9.500037746078058e-06, "loss": 0.5528, "step": 2657 }, { "epoch": 0.34, "grad_norm": 0.6536904250392873, "learning_rate": 9.499588008379534e-06, "loss": 0.5198, "step": 2658 }, { "epoch": 0.34, "grad_norm": 0.6360593846325148, "learning_rate": 9.499138079147927e-06, "loss": 0.592, "step": 2659 }, { "epoch": 0.34, "grad_norm": 0.6326183225471013, "learning_rate": 9.498687958402388e-06, "loss": 0.5683, "step": 2660 }, { "epoch": 0.34, "grad_norm": 0.6737489838915545, "learning_rate": 9.498237646162078e-06, "loss": 0.5475, "step": 2661 }, { "epoch": 0.34, "grad_norm": 0.6694602163920368, "learning_rate": 9.497787142446166e-06, "loss": 0.5977, "step": 2662 }, { "epoch": 0.34, "grad_norm": 0.6919396971952262, "learning_rate": 9.497336447273828e-06, "loss": 0.6119, "step": 2663 }, { "epoch": 0.34, "grad_norm": 0.6545875727828754, "learning_rate": 9.496885560664251e-06, "loss": 0.5487, "step": 2664 }, { "epoch": 0.34, "grad_norm": 0.8161631128077677, "learning_rate": 9.496434482636623e-06, "loss": 0.6141, "step": 2665 }, { "epoch": 0.34, "grad_norm": 1.066318647899695, "learning_rate": 9.49598321321015e-06, "loss": 0.5804, "step": 2666 }, { "epoch": 0.34, "grad_norm": 0.6145505682433737, "learning_rate": 9.495531752404036e-06, "loss": 0.5522, "step": 2667 }, { "epoch": 0.34, "grad_norm": 0.7404581570988357, "learning_rate": 9.495080100237503e-06, "loss": 0.6437, "step": 2668 }, { "epoch": 0.34, "grad_norm": 0.6453979666377999, "learning_rate": 9.494628256729774e-06, "loss": 0.5338, "step": 2669 }, { "epoch": 0.34, "grad_norm": 0.7390857664877688, "learning_rate": 9.494176221900081e-06, "loss": 0.5518, "step": 2670 }, { "epoch": 0.34, "grad_norm": 0.6313898819588964, "learning_rate": 9.49372399576767e-06, "loss": 0.5381, "step": 2671 }, { "epoch": 0.34, "grad_norm": 0.7259062340874922, "learning_rate": 9.493271578351787e-06, "loss": 0.5903, "step": 2672 }, { "epoch": 0.34, "grad_norm": 0.9063405020904501, "learning_rate": 9.492818969671693e-06, "loss": 0.5912, "step": 2673 }, { "epoch": 0.34, "grad_norm": 0.6202276898209159, "learning_rate": 9.492366169746652e-06, "loss": 0.5696, "step": 2674 }, { "epoch": 0.34, "grad_norm": 0.8134286613739936, "learning_rate": 9.491913178595937e-06, "loss": 0.596, "step": 2675 }, { "epoch": 0.34, "grad_norm": 0.8993303631152776, "learning_rate": 9.491459996238834e-06, "loss": 0.641, "step": 2676 }, { "epoch": 0.34, "grad_norm": 0.816233493485614, "learning_rate": 9.49100662269463e-06, "loss": 0.6091, "step": 2677 }, { "epoch": 0.34, "grad_norm": 0.6127999309058696, "learning_rate": 9.490553057982627e-06, "loss": 0.4907, "step": 2678 }, { "epoch": 0.34, "grad_norm": 0.6327981151154719, "learning_rate": 9.490099302122129e-06, "loss": 0.5736, "step": 2679 }, { "epoch": 0.34, "grad_norm": 0.8263889646504642, "learning_rate": 9.489645355132452e-06, "loss": 0.6461, "step": 2680 }, { "epoch": 0.34, "grad_norm": 0.6240105661629423, "learning_rate": 9.489191217032919e-06, "loss": 0.5742, "step": 2681 }, { "epoch": 0.34, "grad_norm": 0.6009437124199746, "learning_rate": 9.488736887842862e-06, "loss": 0.5071, "step": 2682 }, { "epoch": 0.34, "grad_norm": 0.8700586532006659, "learning_rate": 9.488282367581619e-06, "loss": 0.712, "step": 2683 }, { "epoch": 0.34, "grad_norm": 0.7503216585904273, "learning_rate": 9.487827656268539e-06, "loss": 0.5549, "step": 2684 }, { "epoch": 0.34, "grad_norm": 0.860642292830429, "learning_rate": 9.487372753922976e-06, "loss": 0.64, "step": 2685 }, { "epoch": 0.34, "grad_norm": 0.5906724271653871, "learning_rate": 9.486917660564294e-06, "loss": 0.5373, "step": 2686 }, { "epoch": 0.34, "grad_norm": 0.748581513417254, "learning_rate": 9.486462376211866e-06, "loss": 0.5543, "step": 2687 }, { "epoch": 0.34, "grad_norm": 0.6625302637533109, "learning_rate": 9.486006900885073e-06, "loss": 0.5095, "step": 2688 }, { "epoch": 0.34, "grad_norm": 0.6821843085540856, "learning_rate": 9.485551234603299e-06, "loss": 0.5771, "step": 2689 }, { "epoch": 0.34, "grad_norm": 0.8304659259874845, "learning_rate": 9.485095377385943e-06, "loss": 0.5634, "step": 2690 }, { "epoch": 0.34, "grad_norm": 0.7512685960757559, "learning_rate": 9.484639329252409e-06, "loss": 0.6491, "step": 2691 }, { "epoch": 0.34, "grad_norm": 0.7534347072646292, "learning_rate": 9.48418309022211e-06, "loss": 0.6229, "step": 2692 }, { "epoch": 0.34, "grad_norm": 0.6126461797913709, "learning_rate": 9.483726660314464e-06, "loss": 0.5082, "step": 2693 }, { "epoch": 0.34, "grad_norm": 0.7279374182617983, "learning_rate": 9.483270039548906e-06, "loss": 0.5317, "step": 2694 }, { "epoch": 0.34, "grad_norm": 0.9354531491803656, "learning_rate": 9.482813227944868e-06, "loss": 0.6301, "step": 2695 }, { "epoch": 0.34, "grad_norm": 1.2255087844545716, "learning_rate": 9.482356225521794e-06, "loss": 0.6343, "step": 2696 }, { "epoch": 0.34, "grad_norm": 0.6515432382342676, "learning_rate": 9.48189903229914e-06, "loss": 0.608, "step": 2697 }, { "epoch": 0.34, "grad_norm": 0.7259746189084652, "learning_rate": 9.481441648296367e-06, "loss": 0.6028, "step": 2698 }, { "epoch": 0.34, "grad_norm": 0.8509618283107478, "learning_rate": 9.480984073532943e-06, "loss": 0.6006, "step": 2699 }, { "epoch": 0.34, "grad_norm": 0.661686115769553, "learning_rate": 9.480526308028345e-06, "loss": 0.5693, "step": 2700 }, { "epoch": 0.34, "grad_norm": 0.8340500821731865, "learning_rate": 9.480068351802063e-06, "loss": 0.6269, "step": 2701 }, { "epoch": 0.34, "grad_norm": 0.7294376986355238, "learning_rate": 9.479610204873586e-06, "loss": 0.5847, "step": 2702 }, { "epoch": 0.34, "grad_norm": 0.64222982626784, "learning_rate": 9.479151867262417e-06, "loss": 0.6098, "step": 2703 }, { "epoch": 0.34, "grad_norm": 0.7411094065018148, "learning_rate": 9.478693338988065e-06, "loss": 0.5866, "step": 2704 }, { "epoch": 0.34, "grad_norm": 0.7757412492305917, "learning_rate": 9.47823462007005e-06, "loss": 0.6302, "step": 2705 }, { "epoch": 0.34, "grad_norm": 0.7008346031950602, "learning_rate": 9.477775710527898e-06, "loss": 0.6065, "step": 2706 }, { "epoch": 0.34, "grad_norm": 0.6450380535106405, "learning_rate": 9.477316610381144e-06, "loss": 0.5669, "step": 2707 }, { "epoch": 0.34, "grad_norm": 0.8020350688024981, "learning_rate": 9.47685731964933e-06, "loss": 0.5942, "step": 2708 }, { "epoch": 0.35, "grad_norm": 0.6515230738536907, "learning_rate": 9.476397838352003e-06, "loss": 0.537, "step": 2709 }, { "epoch": 0.35, "grad_norm": 0.7106797304009778, "learning_rate": 9.475938166508726e-06, "loss": 0.5968, "step": 2710 }, { "epoch": 0.35, "grad_norm": 0.6767112085036076, "learning_rate": 9.475478304139064e-06, "loss": 0.5551, "step": 2711 }, { "epoch": 0.35, "grad_norm": 0.8966197085509974, "learning_rate": 9.475018251262594e-06, "loss": 0.6464, "step": 2712 }, { "epoch": 0.35, "grad_norm": 0.8575931820709087, "learning_rate": 9.474558007898895e-06, "loss": 0.6516, "step": 2713 }, { "epoch": 0.35, "grad_norm": 0.9080998604636245, "learning_rate": 9.47409757406756e-06, "loss": 0.604, "step": 2714 }, { "epoch": 0.35, "grad_norm": 0.8304634081485033, "learning_rate": 9.47363694978819e-06, "loss": 0.5593, "step": 2715 }, { "epoch": 0.35, "grad_norm": 0.8461238243280341, "learning_rate": 9.473176135080392e-06, "loss": 0.6296, "step": 2716 }, { "epoch": 0.35, "grad_norm": 0.6635571220376301, "learning_rate": 9.472715129963776e-06, "loss": 0.5634, "step": 2717 }, { "epoch": 0.35, "grad_norm": 0.6624474616243881, "learning_rate": 9.472253934457973e-06, "loss": 0.5372, "step": 2718 }, { "epoch": 0.35, "grad_norm": 0.6674988841873706, "learning_rate": 9.47179254858261e-06, "loss": 0.5388, "step": 2719 }, { "epoch": 0.35, "grad_norm": 1.19573730107884, "learning_rate": 9.471330972357327e-06, "loss": 0.6058, "step": 2720 }, { "epoch": 0.35, "grad_norm": 0.5937215067510677, "learning_rate": 9.470869205801774e-06, "loss": 0.566, "step": 2721 }, { "epoch": 0.35, "grad_norm": 0.5487173465229642, "learning_rate": 9.470407248935606e-06, "loss": 0.5182, "step": 2722 }, { "epoch": 0.35, "grad_norm": 0.8008315113990622, "learning_rate": 9.469945101778487e-06, "loss": 0.5512, "step": 2723 }, { "epoch": 0.35, "grad_norm": 0.6749228954537383, "learning_rate": 9.46948276435009e-06, "loss": 0.5427, "step": 2724 }, { "epoch": 0.35, "grad_norm": 0.7251223243488515, "learning_rate": 9.46902023667009e-06, "loss": 0.5931, "step": 2725 }, { "epoch": 0.35, "grad_norm": 0.5893424590220128, "learning_rate": 9.468557518758183e-06, "loss": 0.5271, "step": 2726 }, { "epoch": 0.35, "grad_norm": 0.6206131778648086, "learning_rate": 9.468094610634062e-06, "loss": 0.5336, "step": 2727 }, { "epoch": 0.35, "grad_norm": 0.7232629802065703, "learning_rate": 9.46763151231743e-06, "loss": 0.5801, "step": 2728 }, { "epoch": 0.35, "grad_norm": 0.5559640325030679, "learning_rate": 9.467168223828001e-06, "loss": 0.5295, "step": 2729 }, { "epoch": 0.35, "grad_norm": 0.6453014003908335, "learning_rate": 9.466704745185497e-06, "loss": 0.5208, "step": 2730 }, { "epoch": 0.35, "grad_norm": 0.7949818119663074, "learning_rate": 9.466241076409644e-06, "loss": 0.6, "step": 2731 }, { "epoch": 0.35, "grad_norm": 0.6768070524933107, "learning_rate": 9.46577721752018e-06, "loss": 0.5493, "step": 2732 }, { "epoch": 0.35, "grad_norm": 0.6235733846150524, "learning_rate": 9.465313168536852e-06, "loss": 0.4976, "step": 2733 }, { "epoch": 0.35, "grad_norm": 0.5526313657564148, "learning_rate": 9.464848929479412e-06, "loss": 0.5021, "step": 2734 }, { "epoch": 0.35, "grad_norm": 0.7255657737830097, "learning_rate": 9.46438450036762e-06, "loss": 0.5794, "step": 2735 }, { "epoch": 0.35, "grad_norm": 0.729547906615159, "learning_rate": 9.463919881221246e-06, "loss": 0.631, "step": 2736 }, { "epoch": 0.35, "grad_norm": 0.7358070039119499, "learning_rate": 9.463455072060066e-06, "loss": 0.7151, "step": 2737 }, { "epoch": 0.35, "grad_norm": 0.565924718889971, "learning_rate": 9.462990072903868e-06, "loss": 0.4992, "step": 2738 }, { "epoch": 0.35, "grad_norm": 0.708636311457162, "learning_rate": 9.462524883772444e-06, "loss": 0.5494, "step": 2739 }, { "epoch": 0.35, "grad_norm": 0.7565394219576851, "learning_rate": 9.462059504685596e-06, "loss": 0.6017, "step": 2740 }, { "epoch": 0.35, "grad_norm": 0.6288142162213577, "learning_rate": 9.461593935663134e-06, "loss": 0.5263, "step": 2741 }, { "epoch": 0.35, "grad_norm": 0.628364933812235, "learning_rate": 9.461128176724876e-06, "loss": 0.522, "step": 2742 }, { "epoch": 0.35, "grad_norm": 0.6109788086616127, "learning_rate": 9.460662227890645e-06, "loss": 0.55, "step": 2743 }, { "epoch": 0.35, "grad_norm": 0.710527864029022, "learning_rate": 9.460196089180278e-06, "loss": 0.6023, "step": 2744 }, { "epoch": 0.35, "grad_norm": 0.5793125193996272, "learning_rate": 9.459729760613618e-06, "loss": 0.521, "step": 2745 }, { "epoch": 0.35, "grad_norm": 0.633340780233596, "learning_rate": 9.459263242210511e-06, "loss": 0.5724, "step": 2746 }, { "epoch": 0.35, "grad_norm": 0.9481665252210578, "learning_rate": 9.45879653399082e-06, "loss": 0.6289, "step": 2747 }, { "epoch": 0.35, "grad_norm": 0.7203959862754759, "learning_rate": 9.458329635974405e-06, "loss": 0.6071, "step": 2748 }, { "epoch": 0.35, "grad_norm": 0.7140244824243365, "learning_rate": 9.457862548181147e-06, "loss": 0.5492, "step": 2749 }, { "epoch": 0.35, "grad_norm": 0.6796579025760305, "learning_rate": 9.457395270630926e-06, "loss": 0.5862, "step": 2750 }, { "epoch": 0.35, "grad_norm": 0.6233384318469098, "learning_rate": 9.456927803343631e-06, "loss": 0.6139, "step": 2751 }, { "epoch": 0.35, "grad_norm": 0.5590310560221577, "learning_rate": 9.456460146339162e-06, "loss": 0.5194, "step": 2752 }, { "epoch": 0.35, "grad_norm": 0.6587705455020467, "learning_rate": 9.455992299637426e-06, "loss": 0.5528, "step": 2753 }, { "epoch": 0.35, "grad_norm": 0.6777434539217504, "learning_rate": 9.455524263258335e-06, "loss": 0.5015, "step": 2754 }, { "epoch": 0.35, "grad_norm": 1.218106107899709, "learning_rate": 9.455056037221816e-06, "loss": 0.5961, "step": 2755 }, { "epoch": 0.35, "grad_norm": 0.8525734012370407, "learning_rate": 9.454587621547796e-06, "loss": 0.6683, "step": 2756 }, { "epoch": 0.35, "grad_norm": 0.9978909741872638, "learning_rate": 9.454119016256218e-06, "loss": 0.6548, "step": 2757 }, { "epoch": 0.35, "grad_norm": 0.6537367828334337, "learning_rate": 9.453650221367025e-06, "loss": 0.5757, "step": 2758 }, { "epoch": 0.35, "grad_norm": 0.8057521803778169, "learning_rate": 9.453181236900175e-06, "loss": 0.5573, "step": 2759 }, { "epoch": 0.35, "grad_norm": 0.8751902451285365, "learning_rate": 9.452712062875631e-06, "loss": 0.603, "step": 2760 }, { "epoch": 0.35, "grad_norm": 0.7558656559155569, "learning_rate": 9.452242699313361e-06, "loss": 0.6003, "step": 2761 }, { "epoch": 0.35, "grad_norm": 0.643190086717265, "learning_rate": 9.451773146233348e-06, "loss": 0.5123, "step": 2762 }, { "epoch": 0.35, "grad_norm": 0.7753792676439469, "learning_rate": 9.451303403655579e-06, "loss": 0.6313, "step": 2763 }, { "epoch": 0.35, "grad_norm": 0.7982668840577974, "learning_rate": 9.450833471600047e-06, "loss": 0.6137, "step": 2764 }, { "epoch": 0.35, "grad_norm": 0.6469800456142494, "learning_rate": 9.450363350086756e-06, "loss": 0.5721, "step": 2765 }, { "epoch": 0.35, "grad_norm": 0.5954827834637731, "learning_rate": 9.44989303913572e-06, "loss": 0.5192, "step": 2766 }, { "epoch": 0.35, "grad_norm": 0.6267961389988276, "learning_rate": 9.449422538766958e-06, "loss": 0.5352, "step": 2767 }, { "epoch": 0.35, "grad_norm": 0.6181168997578211, "learning_rate": 9.448951849000494e-06, "loss": 0.5943, "step": 2768 }, { "epoch": 0.35, "grad_norm": 0.9379775552391613, "learning_rate": 9.448480969856368e-06, "loss": 0.6026, "step": 2769 }, { "epoch": 0.35, "grad_norm": 0.8224891932385208, "learning_rate": 9.448009901354623e-06, "loss": 0.6197, "step": 2770 }, { "epoch": 0.35, "grad_norm": 0.6413797970707639, "learning_rate": 9.44753864351531e-06, "loss": 0.5587, "step": 2771 }, { "epoch": 0.35, "grad_norm": 0.9317899250101933, "learning_rate": 9.44706719635849e-06, "loss": 0.6364, "step": 2772 }, { "epoch": 0.35, "grad_norm": 0.6969392482600794, "learning_rate": 9.446595559904228e-06, "loss": 0.5118, "step": 2773 }, { "epoch": 0.35, "grad_norm": 0.7753036998085789, "learning_rate": 9.446123734172606e-06, "loss": 0.5955, "step": 2774 }, { "epoch": 0.35, "grad_norm": 1.1130667261540452, "learning_rate": 9.445651719183701e-06, "loss": 0.7081, "step": 2775 }, { "epoch": 0.35, "grad_norm": 0.7918291910362325, "learning_rate": 9.44517951495761e-06, "loss": 0.624, "step": 2776 }, { "epoch": 0.35, "grad_norm": 0.6756623501720954, "learning_rate": 9.444707121514432e-06, "loss": 0.5837, "step": 2777 }, { "epoch": 0.35, "grad_norm": 0.6287568219254337, "learning_rate": 9.444234538874273e-06, "loss": 0.5435, "step": 2778 }, { "epoch": 0.35, "grad_norm": 0.5790455136469947, "learning_rate": 9.443761767057253e-06, "loss": 0.517, "step": 2779 }, { "epoch": 0.35, "grad_norm": 0.8612660254985647, "learning_rate": 9.443288806083496e-06, "loss": 0.6321, "step": 2780 }, { "epoch": 0.35, "grad_norm": 0.7449823418496511, "learning_rate": 9.442815655973133e-06, "loss": 0.6455, "step": 2781 }, { "epoch": 0.35, "grad_norm": 0.5845316912298215, "learning_rate": 9.442342316746303e-06, "loss": 0.555, "step": 2782 }, { "epoch": 0.35, "grad_norm": 0.9020077596190232, "learning_rate": 9.441868788423156e-06, "loss": 0.6682, "step": 2783 }, { "epoch": 0.35, "grad_norm": 0.6004706591343363, "learning_rate": 9.44139507102385e-06, "loss": 0.5382, "step": 2784 }, { "epoch": 0.35, "grad_norm": 0.7774042995780771, "learning_rate": 9.440921164568548e-06, "loss": 0.6594, "step": 2785 }, { "epoch": 0.35, "grad_norm": 0.7090563768533579, "learning_rate": 9.440447069077425e-06, "loss": 0.5637, "step": 2786 }, { "epoch": 0.36, "grad_norm": 0.6212625374063201, "learning_rate": 9.439972784570659e-06, "loss": 0.5373, "step": 2787 }, { "epoch": 0.36, "grad_norm": 0.9759312113547524, "learning_rate": 9.439498311068438e-06, "loss": 0.6406, "step": 2788 }, { "epoch": 0.36, "grad_norm": 0.7079321917169328, "learning_rate": 9.439023648590961e-06, "loss": 0.5166, "step": 2789 }, { "epoch": 0.36, "grad_norm": 0.6498521179541045, "learning_rate": 9.438548797158435e-06, "loss": 0.532, "step": 2790 }, { "epoch": 0.36, "grad_norm": 0.5737016888241293, "learning_rate": 9.438073756791068e-06, "loss": 0.5157, "step": 2791 }, { "epoch": 0.36, "grad_norm": 0.6755265566833282, "learning_rate": 9.437598527509082e-06, "loss": 0.5655, "step": 2792 }, { "epoch": 0.36, "grad_norm": 0.7078292393750686, "learning_rate": 9.43712310933271e-06, "loss": 0.6258, "step": 2793 }, { "epoch": 0.36, "grad_norm": 0.7853443316626899, "learning_rate": 9.436647502282185e-06, "loss": 0.5843, "step": 2794 }, { "epoch": 0.36, "grad_norm": 0.7130818120694572, "learning_rate": 9.436171706377753e-06, "loss": 0.5606, "step": 2795 }, { "epoch": 0.36, "grad_norm": 0.77522789020357, "learning_rate": 9.435695721639668e-06, "loss": 0.6855, "step": 2796 }, { "epoch": 0.36, "grad_norm": 0.6393191307337324, "learning_rate": 9.43521954808819e-06, "loss": 0.5047, "step": 2797 }, { "epoch": 0.36, "grad_norm": 0.8133557704088161, "learning_rate": 9.43474318574359e-06, "loss": 0.6843, "step": 2798 }, { "epoch": 0.36, "grad_norm": 0.91311500568812, "learning_rate": 9.434266634626143e-06, "loss": 0.6161, "step": 2799 }, { "epoch": 0.36, "grad_norm": 0.6518119783303293, "learning_rate": 9.433789894756136e-06, "loss": 0.5574, "step": 2800 }, { "epoch": 0.36, "grad_norm": 1.2686676296534765, "learning_rate": 9.43331296615386e-06, "loss": 0.644, "step": 2801 }, { "epoch": 0.36, "grad_norm": 0.748624786857783, "learning_rate": 9.432835848839619e-06, "loss": 0.5264, "step": 2802 }, { "epoch": 0.36, "grad_norm": 0.638531267290233, "learning_rate": 9.432358542833722e-06, "loss": 0.5296, "step": 2803 }, { "epoch": 0.36, "grad_norm": 0.6667117779795751, "learning_rate": 9.431881048156484e-06, "loss": 0.5383, "step": 2804 }, { "epoch": 0.36, "grad_norm": 1.1193428143495239, "learning_rate": 9.431403364828233e-06, "loss": 0.5939, "step": 2805 }, { "epoch": 0.36, "grad_norm": 0.919471429477277, "learning_rate": 9.4309254928693e-06, "loss": 0.622, "step": 2806 }, { "epoch": 0.36, "grad_norm": 0.817986507762727, "learning_rate": 9.43044743230003e-06, "loss": 0.6453, "step": 2807 }, { "epoch": 0.36, "grad_norm": 0.7075351440730898, "learning_rate": 9.429969183140771e-06, "loss": 0.5307, "step": 2808 }, { "epoch": 0.36, "grad_norm": 0.8892681978788348, "learning_rate": 9.429490745411878e-06, "loss": 0.5703, "step": 2809 }, { "epoch": 0.36, "grad_norm": 0.6934844745237143, "learning_rate": 9.42901211913372e-06, "loss": 0.5847, "step": 2810 }, { "epoch": 0.36, "grad_norm": 0.7907377083152881, "learning_rate": 9.428533304326668e-06, "loss": 0.5943, "step": 2811 }, { "epoch": 0.36, "grad_norm": 0.7895661295094039, "learning_rate": 9.428054301011104e-06, "loss": 0.6229, "step": 2812 }, { "epoch": 0.36, "grad_norm": 0.592793376722089, "learning_rate": 9.42757510920742e-06, "loss": 0.5125, "step": 2813 }, { "epoch": 0.36, "grad_norm": 0.9090321305209546, "learning_rate": 9.427095728936013e-06, "loss": 0.6113, "step": 2814 }, { "epoch": 0.36, "grad_norm": 0.6829019996273853, "learning_rate": 9.426616160217288e-06, "loss": 0.5814, "step": 2815 }, { "epoch": 0.36, "grad_norm": 0.6471304331268625, "learning_rate": 9.426136403071656e-06, "loss": 0.5511, "step": 2816 }, { "epoch": 0.36, "grad_norm": 0.6523135127801731, "learning_rate": 9.425656457519544e-06, "loss": 0.5825, "step": 2817 }, { "epoch": 0.36, "grad_norm": 1.159718982308664, "learning_rate": 9.425176323581375e-06, "loss": 0.6203, "step": 2818 }, { "epoch": 0.36, "grad_norm": 0.8029568291454016, "learning_rate": 9.424696001277594e-06, "loss": 0.5665, "step": 2819 }, { "epoch": 0.36, "grad_norm": 1.0670005911598865, "learning_rate": 9.424215490628644e-06, "loss": 0.6193, "step": 2820 }, { "epoch": 0.36, "grad_norm": 0.6666859487893871, "learning_rate": 9.423734791654976e-06, "loss": 0.5869, "step": 2821 }, { "epoch": 0.36, "grad_norm": 0.7380117096550212, "learning_rate": 9.423253904377054e-06, "loss": 0.5548, "step": 2822 }, { "epoch": 0.36, "grad_norm": 0.86011089010381, "learning_rate": 9.42277282881535e-06, "loss": 0.5972, "step": 2823 }, { "epoch": 0.36, "grad_norm": 0.8826088904629107, "learning_rate": 9.422291564990339e-06, "loss": 0.6252, "step": 2824 }, { "epoch": 0.36, "grad_norm": 0.8408935153832707, "learning_rate": 9.421810112922507e-06, "loss": 0.6092, "step": 2825 }, { "epoch": 0.36, "grad_norm": 0.8537020795261052, "learning_rate": 9.421328472632349e-06, "loss": 0.68, "step": 2826 }, { "epoch": 0.36, "grad_norm": 0.6949599215545131, "learning_rate": 9.420846644140368e-06, "loss": 0.5896, "step": 2827 }, { "epoch": 0.36, "grad_norm": 0.7244250735563492, "learning_rate": 9.420364627467071e-06, "loss": 0.6225, "step": 2828 }, { "epoch": 0.36, "grad_norm": 0.7446010533844636, "learning_rate": 9.419882422632978e-06, "loss": 0.588, "step": 2829 }, { "epoch": 0.36, "grad_norm": 0.7872628454553385, "learning_rate": 9.419400029658613e-06, "loss": 0.5979, "step": 2830 }, { "epoch": 0.36, "grad_norm": 0.6342756540961432, "learning_rate": 9.418917448564512e-06, "loss": 0.5236, "step": 2831 }, { "epoch": 0.36, "grad_norm": 0.9368501245531178, "learning_rate": 9.418434679371216e-06, "loss": 0.6125, "step": 2832 }, { "epoch": 0.36, "grad_norm": 1.033354267299191, "learning_rate": 9.417951722099275e-06, "loss": 0.6481, "step": 2833 }, { "epoch": 0.36, "grad_norm": 0.630607770788086, "learning_rate": 9.417468576769247e-06, "loss": 0.5658, "step": 2834 }, { "epoch": 0.36, "grad_norm": 1.4325380942899781, "learning_rate": 9.416985243401696e-06, "loss": 0.6096, "step": 2835 }, { "epoch": 0.36, "grad_norm": 0.5761440335248742, "learning_rate": 9.4165017220172e-06, "loss": 0.5246, "step": 2836 }, { "epoch": 0.36, "grad_norm": 0.7249543068030742, "learning_rate": 9.41601801263634e-06, "loss": 0.6412, "step": 2837 }, { "epoch": 0.36, "grad_norm": 0.9412920957801415, "learning_rate": 9.415534115279701e-06, "loss": 0.5744, "step": 2838 }, { "epoch": 0.36, "grad_norm": 0.7897584709082406, "learning_rate": 9.415050029967887e-06, "loss": 0.5695, "step": 2839 }, { "epoch": 0.36, "grad_norm": 1.0703647721946201, "learning_rate": 9.4145657567215e-06, "loss": 0.595, "step": 2840 }, { "epoch": 0.36, "grad_norm": 0.8092186795924167, "learning_rate": 9.414081295561157e-06, "loss": 0.6585, "step": 2841 }, { "epoch": 0.36, "grad_norm": 0.9069107351169117, "learning_rate": 9.41359664650748e-06, "loss": 0.639, "step": 2842 }, { "epoch": 0.36, "grad_norm": 0.7338329136891169, "learning_rate": 9.413111809581097e-06, "loss": 0.5871, "step": 2843 }, { "epoch": 0.36, "grad_norm": 0.645351721082941, "learning_rate": 9.412626784802646e-06, "loss": 0.5864, "step": 2844 }, { "epoch": 0.36, "grad_norm": 0.853078731366598, "learning_rate": 9.412141572192772e-06, "loss": 0.6837, "step": 2845 }, { "epoch": 0.36, "grad_norm": 0.5737539198299346, "learning_rate": 9.411656171772132e-06, "loss": 0.5264, "step": 2846 }, { "epoch": 0.36, "grad_norm": 0.8672541381913115, "learning_rate": 9.411170583561386e-06, "loss": 0.6583, "step": 2847 }, { "epoch": 0.36, "grad_norm": 0.8309777279979316, "learning_rate": 9.410684807581204e-06, "loss": 0.6165, "step": 2848 }, { "epoch": 0.36, "grad_norm": 0.5985005098021324, "learning_rate": 9.410198843852267e-06, "loss": 0.5366, "step": 2849 }, { "epoch": 0.36, "grad_norm": 0.9759883360897562, "learning_rate": 9.409712692395257e-06, "loss": 0.6922, "step": 2850 }, { "epoch": 0.36, "grad_norm": 1.0933014987638408, "learning_rate": 9.409226353230866e-06, "loss": 0.6349, "step": 2851 }, { "epoch": 0.36, "grad_norm": 0.6851806530435292, "learning_rate": 9.408739826379802e-06, "loss": 0.5324, "step": 2852 }, { "epoch": 0.36, "grad_norm": 0.6426987004506657, "learning_rate": 9.40825311186277e-06, "loss": 0.5423, "step": 2853 }, { "epoch": 0.36, "grad_norm": 0.7543805886760679, "learning_rate": 9.407766209700493e-06, "loss": 0.6085, "step": 2854 }, { "epoch": 0.36, "grad_norm": 0.6945186168163094, "learning_rate": 9.40727911991369e-06, "loss": 0.6244, "step": 2855 }, { "epoch": 0.36, "grad_norm": 0.9530437827842488, "learning_rate": 9.406791842523101e-06, "loss": 0.6679, "step": 2856 }, { "epoch": 0.36, "grad_norm": 0.6025847100072658, "learning_rate": 9.406304377549464e-06, "loss": 0.52, "step": 2857 }, { "epoch": 0.36, "grad_norm": 0.8686059097275766, "learning_rate": 9.40581672501353e-06, "loss": 0.6455, "step": 2858 }, { "epoch": 0.36, "grad_norm": 0.6122480572803359, "learning_rate": 9.405328884936058e-06, "loss": 0.5348, "step": 2859 }, { "epoch": 0.36, "grad_norm": 0.585461852705897, "learning_rate": 9.404840857337814e-06, "loss": 0.5403, "step": 2860 }, { "epoch": 0.36, "grad_norm": 0.5677296145455577, "learning_rate": 9.404352642239569e-06, "loss": 0.5262, "step": 2861 }, { "epoch": 0.36, "grad_norm": 1.081751345125273, "learning_rate": 9.403864239662106e-06, "loss": 0.6406, "step": 2862 }, { "epoch": 0.36, "grad_norm": 0.8885644008018558, "learning_rate": 9.403375649626214e-06, "loss": 0.6475, "step": 2863 }, { "epoch": 0.36, "grad_norm": 0.6353144235774352, "learning_rate": 9.402886872152695e-06, "loss": 0.5235, "step": 2864 }, { "epoch": 0.36, "grad_norm": 0.7409406000262078, "learning_rate": 9.402397907262348e-06, "loss": 0.6313, "step": 2865 }, { "epoch": 0.37, "grad_norm": 0.5830889351646802, "learning_rate": 9.401908754975993e-06, "loss": 0.5451, "step": 2866 }, { "epoch": 0.37, "grad_norm": 0.601798767248453, "learning_rate": 9.401419415314447e-06, "loss": 0.501, "step": 2867 }, { "epoch": 0.37, "grad_norm": 0.6403338198548815, "learning_rate": 9.400929888298542e-06, "loss": 0.583, "step": 2868 }, { "epoch": 0.37, "grad_norm": 0.6519778667822522, "learning_rate": 9.400440173949115e-06, "loss": 0.5455, "step": 2869 }, { "epoch": 0.37, "grad_norm": 0.616829522867513, "learning_rate": 9.399950272287011e-06, "loss": 0.6113, "step": 2870 }, { "epoch": 0.37, "grad_norm": 0.5984801817276588, "learning_rate": 9.399460183333084e-06, "loss": 0.4937, "step": 2871 }, { "epoch": 0.37, "grad_norm": 0.8405439718925305, "learning_rate": 9.398969907108198e-06, "loss": 0.6504, "step": 2872 }, { "epoch": 0.37, "grad_norm": 0.8285620852945064, "learning_rate": 9.398479443633217e-06, "loss": 0.5455, "step": 2873 }, { "epoch": 0.37, "grad_norm": 0.658413194806729, "learning_rate": 9.397988792929024e-06, "loss": 0.5504, "step": 2874 }, { "epoch": 0.37, "grad_norm": 0.779401416885058, "learning_rate": 9.397497955016502e-06, "loss": 0.5915, "step": 2875 }, { "epoch": 0.37, "grad_norm": 0.5954922739090105, "learning_rate": 9.397006929916542e-06, "loss": 0.5742, "step": 2876 }, { "epoch": 0.37, "grad_norm": 0.7908914462166622, "learning_rate": 9.39651571765005e-06, "loss": 0.6958, "step": 2877 }, { "epoch": 0.37, "grad_norm": 0.5842953799188918, "learning_rate": 9.396024318237932e-06, "loss": 0.5165, "step": 2878 }, { "epoch": 0.37, "grad_norm": 0.8778519407334405, "learning_rate": 9.395532731701106e-06, "loss": 0.6618, "step": 2879 }, { "epoch": 0.37, "grad_norm": 0.732685740487691, "learning_rate": 9.3950409580605e-06, "loss": 0.5762, "step": 2880 }, { "epoch": 0.37, "grad_norm": 0.6341810910406681, "learning_rate": 9.394548997337044e-06, "loss": 0.5098, "step": 2881 }, { "epoch": 0.37, "grad_norm": 0.6446350693674776, "learning_rate": 9.394056849551681e-06, "loss": 0.5658, "step": 2882 }, { "epoch": 0.37, "grad_norm": 0.6860208191364687, "learning_rate": 9.393564514725357e-06, "loss": 0.5522, "step": 2883 }, { "epoch": 0.37, "grad_norm": 0.573984716627783, "learning_rate": 9.393071992879033e-06, "loss": 0.5381, "step": 2884 }, { "epoch": 0.37, "grad_norm": 0.5983887150093357, "learning_rate": 9.392579284033672e-06, "loss": 0.5736, "step": 2885 }, { "epoch": 0.37, "grad_norm": 0.7597083521338649, "learning_rate": 9.392086388210248e-06, "loss": 0.6242, "step": 2886 }, { "epoch": 0.37, "grad_norm": 0.7071420000098009, "learning_rate": 9.391593305429741e-06, "loss": 0.5656, "step": 2887 }, { "epoch": 0.37, "grad_norm": 0.7808994541893899, "learning_rate": 9.39110003571314e-06, "loss": 0.6728, "step": 2888 }, { "epoch": 0.37, "grad_norm": 0.5811025956057759, "learning_rate": 9.390606579081445e-06, "loss": 0.5341, "step": 2889 }, { "epoch": 0.37, "grad_norm": 0.6992273447442522, "learning_rate": 9.390112935555655e-06, "loss": 0.577, "step": 2890 }, { "epoch": 0.37, "grad_norm": 0.7456521083506628, "learning_rate": 9.389619105156788e-06, "loss": 0.6125, "step": 2891 }, { "epoch": 0.37, "grad_norm": 0.7156156740151897, "learning_rate": 9.389125087905862e-06, "loss": 0.6062, "step": 2892 }, { "epoch": 0.37, "grad_norm": 0.884844055039022, "learning_rate": 9.388630883823909e-06, "loss": 0.5962, "step": 2893 }, { "epoch": 0.37, "grad_norm": 1.1687731001705837, "learning_rate": 9.38813649293196e-06, "loss": 0.6283, "step": 2894 }, { "epoch": 0.37, "grad_norm": 0.5924002278826536, "learning_rate": 9.387641915251065e-06, "loss": 0.5487, "step": 2895 }, { "epoch": 0.37, "grad_norm": 0.5738750677465778, "learning_rate": 9.387147150802274e-06, "loss": 0.589, "step": 2896 }, { "epoch": 0.37, "grad_norm": 0.7334098818311762, "learning_rate": 9.386652199606648e-06, "loss": 0.5495, "step": 2897 }, { "epoch": 0.37, "grad_norm": 0.6344325183000025, "learning_rate": 9.386157061685255e-06, "loss": 0.5708, "step": 2898 }, { "epoch": 0.37, "grad_norm": 0.7188129179715578, "learning_rate": 9.385661737059172e-06, "loss": 0.5859, "step": 2899 }, { "epoch": 0.37, "grad_norm": 0.5763663100946099, "learning_rate": 9.385166225749485e-06, "loss": 0.5492, "step": 2900 }, { "epoch": 0.37, "grad_norm": 0.7245777365379958, "learning_rate": 9.384670527777284e-06, "loss": 0.517, "step": 2901 }, { "epoch": 0.37, "grad_norm": 0.8367286295807743, "learning_rate": 9.38417464316367e-06, "loss": 0.6508, "step": 2902 }, { "epoch": 0.37, "grad_norm": 0.6331625799793746, "learning_rate": 9.38367857192975e-06, "loss": 0.5626, "step": 2903 }, { "epoch": 0.37, "grad_norm": 0.8974882912021842, "learning_rate": 9.383182314096643e-06, "loss": 0.6413, "step": 2904 }, { "epoch": 0.37, "grad_norm": 0.715344356395881, "learning_rate": 9.382685869685473e-06, "loss": 0.5415, "step": 2905 }, { "epoch": 0.37, "grad_norm": 0.7184999660613931, "learning_rate": 9.382189238717367e-06, "loss": 0.5578, "step": 2906 }, { "epoch": 0.37, "grad_norm": 0.6660457606478688, "learning_rate": 9.38169242121347e-06, "loss": 0.5613, "step": 2907 }, { "epoch": 0.37, "grad_norm": 0.7625046137579277, "learning_rate": 9.38119541719493e-06, "loss": 0.6153, "step": 2908 }, { "epoch": 0.37, "grad_norm": 0.6219015803012954, "learning_rate": 9.3806982266829e-06, "loss": 0.5723, "step": 2909 }, { "epoch": 0.37, "grad_norm": 0.6358396301577278, "learning_rate": 9.380200849698547e-06, "loss": 0.5039, "step": 2910 }, { "epoch": 0.37, "grad_norm": 0.8309198004924665, "learning_rate": 9.379703286263037e-06, "loss": 0.6091, "step": 2911 }, { "epoch": 0.37, "grad_norm": 0.7881318920771448, "learning_rate": 9.379205536397558e-06, "loss": 0.6043, "step": 2912 }, { "epoch": 0.37, "grad_norm": 0.658074606345979, "learning_rate": 9.378707600123292e-06, "loss": 0.5866, "step": 2913 }, { "epoch": 0.37, "grad_norm": 0.7781760241293016, "learning_rate": 9.378209477461435e-06, "loss": 0.6365, "step": 2914 }, { "epoch": 0.37, "grad_norm": 0.8019528651249476, "learning_rate": 9.37771116843319e-06, "loss": 0.6024, "step": 2915 }, { "epoch": 0.37, "grad_norm": 0.6137475171688824, "learning_rate": 9.377212673059771e-06, "loss": 0.5373, "step": 2916 }, { "epoch": 0.37, "grad_norm": 0.6083863516564999, "learning_rate": 9.376713991362396e-06, "loss": 0.5458, "step": 2917 }, { "epoch": 0.37, "grad_norm": 0.8613242340804598, "learning_rate": 9.376215123362293e-06, "loss": 0.6441, "step": 2918 }, { "epoch": 0.37, "grad_norm": 0.6604837134858038, "learning_rate": 9.375716069080696e-06, "loss": 0.5517, "step": 2919 }, { "epoch": 0.37, "grad_norm": 0.7052519738588503, "learning_rate": 9.375216828538849e-06, "loss": 0.5298, "step": 2920 }, { "epoch": 0.37, "grad_norm": 0.9483927267239565, "learning_rate": 9.374717401758001e-06, "loss": 0.6746, "step": 2921 }, { "epoch": 0.37, "grad_norm": 0.648908303677272, "learning_rate": 9.374217788759417e-06, "loss": 0.5798, "step": 2922 }, { "epoch": 0.37, "grad_norm": 0.6520297891395666, "learning_rate": 9.373717989564357e-06, "loss": 0.5101, "step": 2923 }, { "epoch": 0.37, "grad_norm": 0.6209113678896905, "learning_rate": 9.373218004194098e-06, "loss": 0.5482, "step": 2924 }, { "epoch": 0.37, "grad_norm": 0.6624328566805205, "learning_rate": 9.372717832669924e-06, "loss": 0.6084, "step": 2925 }, { "epoch": 0.37, "grad_norm": 0.6039012435279127, "learning_rate": 9.372217475013125e-06, "loss": 0.5126, "step": 2926 }, { "epoch": 0.37, "grad_norm": 0.6613123256570572, "learning_rate": 9.371716931245001e-06, "loss": 0.576, "step": 2927 }, { "epoch": 0.37, "grad_norm": 0.6287042674642637, "learning_rate": 9.371216201386855e-06, "loss": 0.5232, "step": 2928 }, { "epoch": 0.37, "grad_norm": 0.5753287882600551, "learning_rate": 9.370715285460006e-06, "loss": 0.5522, "step": 2929 }, { "epoch": 0.37, "grad_norm": 0.846231266735744, "learning_rate": 9.370214183485773e-06, "loss": 0.6236, "step": 2930 }, { "epoch": 0.37, "grad_norm": 0.7573771445645239, "learning_rate": 9.369712895485487e-06, "loss": 0.654, "step": 2931 }, { "epoch": 0.37, "grad_norm": 1.170271331259036, "learning_rate": 9.369211421480488e-06, "loss": 0.6071, "step": 2932 }, { "epoch": 0.37, "grad_norm": 0.6709961556183903, "learning_rate": 9.368709761492118e-06, "loss": 0.5738, "step": 2933 }, { "epoch": 0.37, "grad_norm": 0.6046082867343111, "learning_rate": 9.368207915541736e-06, "loss": 0.5495, "step": 2934 }, { "epoch": 0.37, "grad_norm": 1.1325040102684705, "learning_rate": 9.367705883650702e-06, "loss": 0.6025, "step": 2935 }, { "epoch": 0.37, "grad_norm": 0.7292815796694089, "learning_rate": 9.367203665840385e-06, "loss": 0.5687, "step": 2936 }, { "epoch": 0.37, "grad_norm": 0.9575243393849735, "learning_rate": 9.366701262132164e-06, "loss": 0.6827, "step": 2937 }, { "epoch": 0.37, "grad_norm": 0.6305451456666317, "learning_rate": 9.366198672547424e-06, "loss": 0.5839, "step": 2938 }, { "epoch": 0.37, "grad_norm": 0.8588962449405341, "learning_rate": 9.36569589710756e-06, "loss": 0.6266, "step": 2939 }, { "epoch": 0.37, "grad_norm": 0.589765890407199, "learning_rate": 9.365192935833972e-06, "loss": 0.5277, "step": 2940 }, { "epoch": 0.37, "grad_norm": 0.7664631674614872, "learning_rate": 9.364689788748068e-06, "loss": 0.6369, "step": 2941 }, { "epoch": 0.37, "grad_norm": 1.1335936449339294, "learning_rate": 9.36418645587127e-06, "loss": 0.7047, "step": 2942 }, { "epoch": 0.37, "grad_norm": 0.8685996226447723, "learning_rate": 9.363682937224997e-06, "loss": 0.6507, "step": 2943 }, { "epoch": 0.38, "grad_norm": 0.7300136339693422, "learning_rate": 9.363179232830688e-06, "loss": 0.5687, "step": 2944 }, { "epoch": 0.38, "grad_norm": 0.6508117148397068, "learning_rate": 9.362675342709782e-06, "loss": 0.5304, "step": 2945 }, { "epoch": 0.38, "grad_norm": 0.5731058920579397, "learning_rate": 9.362171266883728e-06, "loss": 0.5281, "step": 2946 }, { "epoch": 0.38, "grad_norm": 0.6330248233737237, "learning_rate": 9.36166700537398e-06, "loss": 0.6018, "step": 2947 }, { "epoch": 0.38, "grad_norm": 0.6931518987762789, "learning_rate": 9.361162558202009e-06, "loss": 0.5297, "step": 2948 }, { "epoch": 0.38, "grad_norm": 0.8090933159926346, "learning_rate": 9.360657925389283e-06, "loss": 0.6245, "step": 2949 }, { "epoch": 0.38, "grad_norm": 0.9081573626819215, "learning_rate": 9.360153106957283e-06, "loss": 0.6553, "step": 2950 }, { "epoch": 0.38, "grad_norm": 0.6089575980777817, "learning_rate": 9.359648102927499e-06, "loss": 0.5723, "step": 2951 }, { "epoch": 0.38, "grad_norm": 0.8969338297373163, "learning_rate": 9.359142913321427e-06, "loss": 0.6536, "step": 2952 }, { "epoch": 0.38, "grad_norm": 0.6081021581080863, "learning_rate": 9.358637538160569e-06, "loss": 0.5302, "step": 2953 }, { "epoch": 0.38, "grad_norm": 0.5845986707871152, "learning_rate": 9.35813197746644e-06, "loss": 0.5377, "step": 2954 }, { "epoch": 0.38, "grad_norm": 0.7618249368956932, "learning_rate": 9.357626231260562e-06, "loss": 0.5494, "step": 2955 }, { "epoch": 0.38, "grad_norm": 0.7053833506407878, "learning_rate": 9.357120299564457e-06, "loss": 0.5632, "step": 2956 }, { "epoch": 0.38, "grad_norm": 0.7454397296241266, "learning_rate": 9.356614182399666e-06, "loss": 0.5882, "step": 2957 }, { "epoch": 0.38, "grad_norm": 0.6079229664082009, "learning_rate": 9.356107879787731e-06, "loss": 0.5482, "step": 2958 }, { "epoch": 0.38, "grad_norm": 0.8058099202927756, "learning_rate": 9.355601391750202e-06, "loss": 0.5491, "step": 2959 }, { "epoch": 0.38, "grad_norm": 0.8408124520543254, "learning_rate": 9.355094718308642e-06, "loss": 0.7278, "step": 2960 }, { "epoch": 0.38, "grad_norm": 0.8638475470427798, "learning_rate": 9.354587859484616e-06, "loss": 0.6491, "step": 2961 }, { "epoch": 0.38, "grad_norm": 0.9616312733333742, "learning_rate": 9.3540808152997e-06, "loss": 0.6151, "step": 2962 }, { "epoch": 0.38, "grad_norm": 0.9157548569921015, "learning_rate": 9.353573585775478e-06, "loss": 0.6686, "step": 2963 }, { "epoch": 0.38, "grad_norm": 0.7716073816140209, "learning_rate": 9.35306617093354e-06, "loss": 0.616, "step": 2964 }, { "epoch": 0.38, "grad_norm": 0.7078847840826726, "learning_rate": 9.352558570795485e-06, "loss": 0.5714, "step": 2965 }, { "epoch": 0.38, "grad_norm": 0.6285416812475433, "learning_rate": 9.35205078538292e-06, "loss": 0.5332, "step": 2966 }, { "epoch": 0.38, "grad_norm": 0.738607843354742, "learning_rate": 9.351542814717463e-06, "loss": 0.6244, "step": 2967 }, { "epoch": 0.38, "grad_norm": 0.7025995652133761, "learning_rate": 9.351034658820731e-06, "loss": 0.5678, "step": 2968 }, { "epoch": 0.38, "grad_norm": 0.6454152210839516, "learning_rate": 9.35052631771436e-06, "loss": 0.5515, "step": 2969 }, { "epoch": 0.38, "grad_norm": 2.2417818654827886, "learning_rate": 9.350017791419983e-06, "loss": 0.6563, "step": 2970 }, { "epoch": 0.38, "grad_norm": 0.977732183414922, "learning_rate": 9.349509079959252e-06, "loss": 0.5975, "step": 2971 }, { "epoch": 0.38, "grad_norm": 0.7946625188117596, "learning_rate": 9.349000183353817e-06, "loss": 0.6598, "step": 2972 }, { "epoch": 0.38, "grad_norm": 1.3933140392024956, "learning_rate": 9.348491101625342e-06, "loss": 0.6087, "step": 2973 }, { "epoch": 0.38, "grad_norm": 0.63265142918763, "learning_rate": 9.347981834795497e-06, "loss": 0.5641, "step": 2974 }, { "epoch": 0.38, "grad_norm": 0.8515643184144605, "learning_rate": 9.347472382885958e-06, "loss": 0.5925, "step": 2975 }, { "epoch": 0.38, "grad_norm": 0.8477336400377684, "learning_rate": 9.346962745918413e-06, "loss": 0.6146, "step": 2976 }, { "epoch": 0.38, "grad_norm": 0.603407537780216, "learning_rate": 9.346452923914555e-06, "loss": 0.4649, "step": 2977 }, { "epoch": 0.38, "grad_norm": 0.6604501094374994, "learning_rate": 9.345942916896087e-06, "loss": 0.5437, "step": 2978 }, { "epoch": 0.38, "grad_norm": 0.694988307245293, "learning_rate": 9.345432724884714e-06, "loss": 0.585, "step": 2979 }, { "epoch": 0.38, "grad_norm": 0.7034293684780129, "learning_rate": 9.344922347902157e-06, "loss": 0.5716, "step": 2980 }, { "epoch": 0.38, "grad_norm": 0.7991747189698661, "learning_rate": 9.34441178597014e-06, "loss": 0.5782, "step": 2981 }, { "epoch": 0.38, "grad_norm": 2.4727008222887323, "learning_rate": 9.343901039110396e-06, "loss": 0.6948, "step": 2982 }, { "epoch": 0.38, "grad_norm": 0.9349853240298963, "learning_rate": 9.343390107344665e-06, "loss": 0.6253, "step": 2983 }, { "epoch": 0.38, "grad_norm": 0.6167534352333365, "learning_rate": 9.342878990694698e-06, "loss": 0.587, "step": 2984 }, { "epoch": 0.38, "grad_norm": 0.7958548637169885, "learning_rate": 9.34236768918225e-06, "loss": 0.637, "step": 2985 }, { "epoch": 0.38, "grad_norm": 0.7553547332723676, "learning_rate": 9.341856202829086e-06, "loss": 0.6235, "step": 2986 }, { "epoch": 0.38, "grad_norm": 0.6116580339787628, "learning_rate": 9.341344531656977e-06, "loss": 0.5701, "step": 2987 }, { "epoch": 0.38, "grad_norm": 0.8274743915536293, "learning_rate": 9.340832675687706e-06, "loss": 0.6359, "step": 2988 }, { "epoch": 0.38, "grad_norm": 0.5895476890248806, "learning_rate": 9.340320634943057e-06, "loss": 0.4841, "step": 2989 }, { "epoch": 0.38, "grad_norm": 0.7026001758067324, "learning_rate": 9.339808409444829e-06, "loss": 0.5757, "step": 2990 }, { "epoch": 0.38, "grad_norm": 0.6487806828137881, "learning_rate": 9.339295999214825e-06, "loss": 0.521, "step": 2991 }, { "epoch": 0.38, "grad_norm": 0.5982423403557943, "learning_rate": 9.338783404274858e-06, "loss": 0.5509, "step": 2992 }, { "epoch": 0.38, "grad_norm": 0.8143840407930086, "learning_rate": 9.338270624646745e-06, "loss": 0.6016, "step": 2993 }, { "epoch": 0.38, "grad_norm": 0.8278309078279198, "learning_rate": 9.337757660352315e-06, "loss": 0.6428, "step": 2994 }, { "epoch": 0.38, "grad_norm": 0.6076523701640529, "learning_rate": 9.337244511413402e-06, "loss": 0.5534, "step": 2995 }, { "epoch": 0.38, "grad_norm": 0.9330251158033744, "learning_rate": 9.336731177851852e-06, "loss": 0.6081, "step": 2996 }, { "epoch": 0.38, "grad_norm": 0.8346508394264712, "learning_rate": 9.336217659689512e-06, "loss": 0.6167, "step": 2997 }, { "epoch": 0.38, "grad_norm": 0.742525535161732, "learning_rate": 9.335703956948243e-06, "loss": 0.5543, "step": 2998 }, { "epoch": 0.38, "grad_norm": 0.7866807279632302, "learning_rate": 9.335190069649913e-06, "loss": 0.5509, "step": 2999 }, { "epoch": 0.38, "grad_norm": 0.6825555935104467, "learning_rate": 9.334675997816393e-06, "loss": 0.5665, "step": 3000 }, { "epoch": 0.38, "grad_norm": 0.7496473110772021, "learning_rate": 9.33416174146957e-06, "loss": 0.6646, "step": 3001 }, { "epoch": 0.38, "grad_norm": 0.6414747458468472, "learning_rate": 9.33364730063133e-06, "loss": 0.5835, "step": 3002 }, { "epoch": 0.38, "grad_norm": 0.7642314919294045, "learning_rate": 9.333132675323573e-06, "loss": 0.6587, "step": 3003 }, { "epoch": 0.38, "grad_norm": 0.6151487272763504, "learning_rate": 9.332617865568204e-06, "loss": 0.5578, "step": 3004 }, { "epoch": 0.38, "grad_norm": 0.8710684662555659, "learning_rate": 9.332102871387139e-06, "loss": 0.5876, "step": 3005 }, { "epoch": 0.38, "grad_norm": 0.6427377041504402, "learning_rate": 9.331587692802298e-06, "loss": 0.5182, "step": 3006 }, { "epoch": 0.38, "grad_norm": 0.7158472416463113, "learning_rate": 9.331072329835608e-06, "loss": 0.5122, "step": 3007 }, { "epoch": 0.38, "grad_norm": 0.7418179663545695, "learning_rate": 9.330556782509013e-06, "loss": 0.6371, "step": 3008 }, { "epoch": 0.38, "grad_norm": 0.5572898008872351, "learning_rate": 9.330041050844451e-06, "loss": 0.5122, "step": 3009 }, { "epoch": 0.38, "grad_norm": 0.7713518982989871, "learning_rate": 9.329525134863879e-06, "loss": 0.6413, "step": 3010 }, { "epoch": 0.38, "grad_norm": 0.6819405797409833, "learning_rate": 9.329009034589259e-06, "loss": 0.553, "step": 3011 }, { "epoch": 0.38, "grad_norm": 0.9541471572663822, "learning_rate": 9.328492750042557e-06, "loss": 0.6898, "step": 3012 }, { "epoch": 0.38, "grad_norm": 0.7001677216180435, "learning_rate": 9.327976281245749e-06, "loss": 0.5193, "step": 3013 }, { "epoch": 0.38, "grad_norm": 0.696208988090597, "learning_rate": 9.327459628220823e-06, "loss": 0.547, "step": 3014 }, { "epoch": 0.38, "grad_norm": 0.8183328967029198, "learning_rate": 9.326942790989768e-06, "loss": 0.6224, "step": 3015 }, { "epoch": 0.38, "grad_norm": 0.8003448604103915, "learning_rate": 9.326425769574586e-06, "loss": 0.5968, "step": 3016 }, { "epoch": 0.38, "grad_norm": 0.7645706263357354, "learning_rate": 9.325908563997284e-06, "loss": 0.5375, "step": 3017 }, { "epoch": 0.38, "grad_norm": 0.6388165494741231, "learning_rate": 9.32539117427988e-06, "loss": 0.5432, "step": 3018 }, { "epoch": 0.38, "grad_norm": 0.6579376954107429, "learning_rate": 9.324873600444393e-06, "loss": 0.5294, "step": 3019 }, { "epoch": 0.38, "grad_norm": 0.5645629811200503, "learning_rate": 9.324355842512858e-06, "loss": 0.5076, "step": 3020 }, { "epoch": 0.38, "grad_norm": 0.7229755582677653, "learning_rate": 9.323837900507313e-06, "loss": 0.5746, "step": 3021 }, { "epoch": 0.38, "grad_norm": 0.790481285180887, "learning_rate": 9.323319774449806e-06, "loss": 0.6435, "step": 3022 }, { "epoch": 0.39, "grad_norm": 0.691859607482006, "learning_rate": 9.32280146436239e-06, "loss": 0.6124, "step": 3023 }, { "epoch": 0.39, "grad_norm": 0.626196159201232, "learning_rate": 9.322282970267132e-06, "loss": 0.5344, "step": 3024 }, { "epoch": 0.39, "grad_norm": 0.8321946396502785, "learning_rate": 9.321764292186098e-06, "loss": 0.6125, "step": 3025 }, { "epoch": 0.39, "grad_norm": 0.6642325621002598, "learning_rate": 9.32124543014137e-06, "loss": 0.5373, "step": 3026 }, { "epoch": 0.39, "grad_norm": 1.2194842676981374, "learning_rate": 9.320726384155032e-06, "loss": 0.6215, "step": 3027 }, { "epoch": 0.39, "grad_norm": 0.6319056284270012, "learning_rate": 9.320207154249179e-06, "loss": 0.5524, "step": 3028 }, { "epoch": 0.39, "grad_norm": 0.6057130280445902, "learning_rate": 9.319687740445914e-06, "loss": 0.5426, "step": 3029 }, { "epoch": 0.39, "grad_norm": 0.8053863670189406, "learning_rate": 9.319168142767344e-06, "loss": 0.6014, "step": 3030 }, { "epoch": 0.39, "grad_norm": 0.8536366098854649, "learning_rate": 9.318648361235588e-06, "loss": 0.5412, "step": 3031 }, { "epoch": 0.39, "grad_norm": 1.1209552390347748, "learning_rate": 9.318128395872772e-06, "loss": 0.6431, "step": 3032 }, { "epoch": 0.39, "grad_norm": 0.5805310978027239, "learning_rate": 9.31760824670103e-06, "loss": 0.5322, "step": 3033 }, { "epoch": 0.39, "grad_norm": 0.7922086275610772, "learning_rate": 9.317087913742501e-06, "loss": 0.5947, "step": 3034 }, { "epoch": 0.39, "grad_norm": 0.7086138941111222, "learning_rate": 9.316567397019336e-06, "loss": 0.4977, "step": 3035 }, { "epoch": 0.39, "grad_norm": 0.8280261185336169, "learning_rate": 9.31604669655369e-06, "loss": 0.639, "step": 3036 }, { "epoch": 0.39, "grad_norm": 0.901513560973976, "learning_rate": 9.315525812367728e-06, "loss": 0.6625, "step": 3037 }, { "epoch": 0.39, "grad_norm": 0.6683418740981407, "learning_rate": 9.315004744483623e-06, "loss": 0.5734, "step": 3038 }, { "epoch": 0.39, "grad_norm": 0.5955479809766612, "learning_rate": 9.314483492923555e-06, "loss": 0.569, "step": 3039 }, { "epoch": 0.39, "grad_norm": 0.7098366300768894, "learning_rate": 9.313962057709712e-06, "loss": 0.5087, "step": 3040 }, { "epoch": 0.39, "grad_norm": 0.6644309826988876, "learning_rate": 9.313440438864288e-06, "loss": 0.5233, "step": 3041 }, { "epoch": 0.39, "grad_norm": 0.9052441480315545, "learning_rate": 9.312918636409488e-06, "loss": 0.5835, "step": 3042 }, { "epoch": 0.39, "grad_norm": 0.7402182307744731, "learning_rate": 9.312396650367528e-06, "loss": 0.6059, "step": 3043 }, { "epoch": 0.39, "grad_norm": 0.5881936680543857, "learning_rate": 9.311874480760619e-06, "loss": 0.4582, "step": 3044 }, { "epoch": 0.39, "grad_norm": 0.7831489203537119, "learning_rate": 9.311352127610995e-06, "loss": 0.648, "step": 3045 }, { "epoch": 0.39, "grad_norm": 0.5943426347788903, "learning_rate": 9.310829590940886e-06, "loss": 0.5038, "step": 3046 }, { "epoch": 0.39, "grad_norm": 0.6626827551503282, "learning_rate": 9.310306870772536e-06, "loss": 0.6143, "step": 3047 }, { "epoch": 0.39, "grad_norm": 0.6808381714363823, "learning_rate": 9.3097839671282e-06, "loss": 0.5389, "step": 3048 }, { "epoch": 0.39, "grad_norm": 0.7441243592991102, "learning_rate": 9.309260880030128e-06, "loss": 0.5767, "step": 3049 }, { "epoch": 0.39, "grad_norm": 0.6852853431179967, "learning_rate": 9.308737609500593e-06, "loss": 0.5732, "step": 3050 }, { "epoch": 0.39, "grad_norm": 0.8530760941007216, "learning_rate": 9.308214155561866e-06, "loss": 0.6467, "step": 3051 }, { "epoch": 0.39, "grad_norm": 0.6322104196931244, "learning_rate": 9.30769051823623e-06, "loss": 0.4893, "step": 3052 }, { "epoch": 0.39, "grad_norm": 1.1032182049391803, "learning_rate": 9.307166697545976e-06, "loss": 0.6418, "step": 3053 }, { "epoch": 0.39, "grad_norm": 0.546218845958658, "learning_rate": 9.306642693513397e-06, "loss": 0.5108, "step": 3054 }, { "epoch": 0.39, "grad_norm": 0.5662008064598786, "learning_rate": 9.3061185061608e-06, "loss": 0.5168, "step": 3055 }, { "epoch": 0.39, "grad_norm": 0.7800336009647092, "learning_rate": 9.3055941355105e-06, "loss": 0.6175, "step": 3056 }, { "epoch": 0.39, "grad_norm": 0.7333866231083132, "learning_rate": 9.305069581584816e-06, "loss": 0.5589, "step": 3057 }, { "epoch": 0.39, "grad_norm": 0.6769398353688174, "learning_rate": 9.304544844406077e-06, "loss": 0.5824, "step": 3058 }, { "epoch": 0.39, "grad_norm": 0.6581380436974328, "learning_rate": 9.304019923996619e-06, "loss": 0.5922, "step": 3059 }, { "epoch": 0.39, "grad_norm": 0.6768552942287963, "learning_rate": 9.303494820378787e-06, "loss": 0.6227, "step": 3060 }, { "epoch": 0.39, "grad_norm": 0.6824732779233269, "learning_rate": 9.302969533574933e-06, "loss": 0.5432, "step": 3061 }, { "epoch": 0.39, "grad_norm": 0.9017951473668746, "learning_rate": 9.302444063607415e-06, "loss": 0.6175, "step": 3062 }, { "epoch": 0.39, "grad_norm": 0.9157586394484665, "learning_rate": 9.301918410498603e-06, "loss": 0.6712, "step": 3063 }, { "epoch": 0.39, "grad_norm": 0.7766878050601725, "learning_rate": 9.301392574270871e-06, "loss": 0.6945, "step": 3064 }, { "epoch": 0.39, "grad_norm": 0.6717405829253689, "learning_rate": 9.300866554946601e-06, "loss": 0.5873, "step": 3065 }, { "epoch": 0.39, "grad_norm": 0.7471310858999942, "learning_rate": 9.300340352548187e-06, "loss": 0.5998, "step": 3066 }, { "epoch": 0.39, "grad_norm": 0.7138892208972915, "learning_rate": 9.299813967098025e-06, "loss": 0.5422, "step": 3067 }, { "epoch": 0.39, "grad_norm": 0.7148621043066545, "learning_rate": 9.299287398618523e-06, "loss": 0.5343, "step": 3068 }, { "epoch": 0.39, "grad_norm": 0.5276683100930166, "learning_rate": 9.298760647132096e-06, "loss": 0.4901, "step": 3069 }, { "epoch": 0.39, "grad_norm": 0.6157281448589456, "learning_rate": 9.298233712661166e-06, "loss": 0.5509, "step": 3070 }, { "epoch": 0.39, "grad_norm": 0.5890621180780182, "learning_rate": 9.29770659522816e-06, "loss": 0.536, "step": 3071 }, { "epoch": 0.39, "grad_norm": 0.5949375071132643, "learning_rate": 9.297179294855519e-06, "loss": 0.5854, "step": 3072 }, { "epoch": 0.39, "grad_norm": 0.7321215936111286, "learning_rate": 9.296651811565685e-06, "loss": 0.6554, "step": 3073 }, { "epoch": 0.39, "grad_norm": 0.6632960728330101, "learning_rate": 9.296124145381116e-06, "loss": 0.5441, "step": 3074 }, { "epoch": 0.39, "grad_norm": 0.8927654358690954, "learning_rate": 9.295596296324268e-06, "loss": 0.5433, "step": 3075 }, { "epoch": 0.39, "grad_norm": 0.5917597025145906, "learning_rate": 9.295068264417615e-06, "loss": 0.5124, "step": 3076 }, { "epoch": 0.39, "grad_norm": 0.8730540277709486, "learning_rate": 9.294540049683629e-06, "loss": 0.6292, "step": 3077 }, { "epoch": 0.39, "grad_norm": 0.76195088902274, "learning_rate": 9.2940116521448e-06, "loss": 0.6291, "step": 3078 }, { "epoch": 0.39, "grad_norm": 0.7144567401257371, "learning_rate": 9.293483071823612e-06, "loss": 0.5584, "step": 3079 }, { "epoch": 0.39, "grad_norm": 0.8238836202252123, "learning_rate": 9.292954308742572e-06, "loss": 0.6001, "step": 3080 }, { "epoch": 0.39, "grad_norm": 0.5557593549520737, "learning_rate": 9.292425362924185e-06, "loss": 0.5021, "step": 3081 }, { "epoch": 0.39, "grad_norm": 0.6445106737516632, "learning_rate": 9.291896234390966e-06, "loss": 0.5657, "step": 3082 }, { "epoch": 0.39, "grad_norm": 0.7875561067619063, "learning_rate": 9.291366923165442e-06, "loss": 0.562, "step": 3083 }, { "epoch": 0.39, "grad_norm": 0.5296996203046099, "learning_rate": 9.290837429270138e-06, "loss": 0.4997, "step": 3084 }, { "epoch": 0.39, "grad_norm": 0.7394728089933901, "learning_rate": 9.290307752727598e-06, "loss": 0.5575, "step": 3085 }, { "epoch": 0.39, "grad_norm": 0.712067102744521, "learning_rate": 9.289777893560368e-06, "loss": 0.5479, "step": 3086 }, { "epoch": 0.39, "grad_norm": 0.7463792735340825, "learning_rate": 9.289247851791e-06, "loss": 0.5827, "step": 3087 }, { "epoch": 0.39, "grad_norm": 0.6464882057670299, "learning_rate": 9.288717627442056e-06, "loss": 0.571, "step": 3088 }, { "epoch": 0.39, "grad_norm": 0.8399134938837616, "learning_rate": 9.288187220536108e-06, "loss": 0.6561, "step": 3089 }, { "epoch": 0.39, "grad_norm": 0.6463657899447613, "learning_rate": 9.287656631095737e-06, "loss": 0.554, "step": 3090 }, { "epoch": 0.39, "grad_norm": 0.6640107311547536, "learning_rate": 9.287125859143519e-06, "loss": 0.5583, "step": 3091 }, { "epoch": 0.39, "grad_norm": 0.6834728724945228, "learning_rate": 9.286594904702056e-06, "loss": 0.5647, "step": 3092 }, { "epoch": 0.39, "grad_norm": 0.7438160033033764, "learning_rate": 9.286063767793946e-06, "loss": 0.5557, "step": 3093 }, { "epoch": 0.39, "grad_norm": 0.7879538753250512, "learning_rate": 9.285532448441796e-06, "loss": 0.6375, "step": 3094 }, { "epoch": 0.39, "grad_norm": 0.5826151051188484, "learning_rate": 9.285000946668225e-06, "loss": 0.5366, "step": 3095 }, { "epoch": 0.39, "grad_norm": 0.8468259040440832, "learning_rate": 9.284469262495858e-06, "loss": 0.6204, "step": 3096 }, { "epoch": 0.39, "grad_norm": 0.8301424864486188, "learning_rate": 9.283937395947324e-06, "loss": 0.545, "step": 3097 }, { "epoch": 0.39, "grad_norm": 0.5769045292234983, "learning_rate": 9.283405347045265e-06, "loss": 0.5145, "step": 3098 }, { "epoch": 0.39, "grad_norm": 0.7910485700784748, "learning_rate": 9.282873115812328e-06, "loss": 0.6243, "step": 3099 }, { "epoch": 0.39, "grad_norm": 0.7994343145544429, "learning_rate": 9.28234070227117e-06, "loss": 0.6042, "step": 3100 }, { "epoch": 0.4, "grad_norm": 0.7675790930144175, "learning_rate": 9.281808106444452e-06, "loss": 0.5719, "step": 3101 }, { "epoch": 0.4, "grad_norm": 0.7582156493616083, "learning_rate": 9.281275328354845e-06, "loss": 0.6003, "step": 3102 }, { "epoch": 0.4, "grad_norm": 0.5871920321578749, "learning_rate": 9.280742368025027e-06, "loss": 0.575, "step": 3103 }, { "epoch": 0.4, "grad_norm": 0.5610931006296737, "learning_rate": 9.280209225477688e-06, "loss": 0.5215, "step": 3104 }, { "epoch": 0.4, "grad_norm": 0.7206658624226153, "learning_rate": 9.279675900735519e-06, "loss": 0.6207, "step": 3105 }, { "epoch": 0.4, "grad_norm": 0.6770833910428605, "learning_rate": 9.279142393821224e-06, "loss": 0.5367, "step": 3106 }, { "epoch": 0.4, "grad_norm": 0.8784298211966007, "learning_rate": 9.278608704757509e-06, "loss": 0.6006, "step": 3107 }, { "epoch": 0.4, "grad_norm": 1.0511083295817143, "learning_rate": 9.278074833567094e-06, "loss": 0.635, "step": 3108 }, { "epoch": 0.4, "grad_norm": 0.7725134460781607, "learning_rate": 9.277540780272707e-06, "loss": 0.66, "step": 3109 }, { "epoch": 0.4, "grad_norm": 0.9074875949708152, "learning_rate": 9.277006544897075e-06, "loss": 0.6412, "step": 3110 }, { "epoch": 0.4, "grad_norm": 0.5747213986561281, "learning_rate": 9.276472127462943e-06, "loss": 0.5407, "step": 3111 }, { "epoch": 0.4, "grad_norm": 0.6084753742855469, "learning_rate": 9.275937527993058e-06, "loss": 0.5673, "step": 3112 }, { "epoch": 0.4, "grad_norm": 0.7364418087309035, "learning_rate": 9.275402746510179e-06, "loss": 0.5565, "step": 3113 }, { "epoch": 0.4, "grad_norm": 0.6074742736840726, "learning_rate": 9.274867783037063e-06, "loss": 0.4966, "step": 3114 }, { "epoch": 0.4, "grad_norm": 0.7712402099091962, "learning_rate": 9.274332637596488e-06, "loss": 0.6442, "step": 3115 }, { "epoch": 0.4, "grad_norm": 0.5292188506702172, "learning_rate": 9.273797310211232e-06, "loss": 0.4742, "step": 3116 }, { "epoch": 0.4, "grad_norm": 0.7506883236759608, "learning_rate": 9.273261800904082e-06, "loss": 0.5994, "step": 3117 }, { "epoch": 0.4, "grad_norm": 0.8367115217354989, "learning_rate": 9.272726109697829e-06, "loss": 0.6219, "step": 3118 }, { "epoch": 0.4, "grad_norm": 0.6130562197922128, "learning_rate": 9.272190236615283e-06, "loss": 0.4886, "step": 3119 }, { "epoch": 0.4, "grad_norm": 0.6482109192490533, "learning_rate": 9.271654181679249e-06, "loss": 0.5192, "step": 3120 }, { "epoch": 0.4, "grad_norm": 0.6525488995115156, "learning_rate": 9.271117944912546e-06, "loss": 0.5351, "step": 3121 }, { "epoch": 0.4, "grad_norm": 0.8013882323655882, "learning_rate": 9.270581526338003e-06, "loss": 0.5881, "step": 3122 }, { "epoch": 0.4, "grad_norm": 0.8160372182766578, "learning_rate": 9.27004492597845e-06, "loss": 0.6463, "step": 3123 }, { "epoch": 0.4, "grad_norm": 0.7607549688946318, "learning_rate": 9.269508143856728e-06, "loss": 0.6309, "step": 3124 }, { "epoch": 0.4, "grad_norm": 0.7238568313130813, "learning_rate": 9.26897117999569e-06, "loss": 0.6006, "step": 3125 }, { "epoch": 0.4, "grad_norm": 0.6115375378503345, "learning_rate": 9.26843403441819e-06, "loss": 0.5531, "step": 3126 }, { "epoch": 0.4, "grad_norm": 0.9119037048165477, "learning_rate": 9.267896707147093e-06, "loss": 0.6225, "step": 3127 }, { "epoch": 0.4, "grad_norm": 0.6926218128611051, "learning_rate": 9.26735919820527e-06, "loss": 0.5986, "step": 3128 }, { "epoch": 0.4, "grad_norm": 0.8324995338310999, "learning_rate": 9.266821507615605e-06, "loss": 0.6328, "step": 3129 }, { "epoch": 0.4, "grad_norm": 0.8445941146845337, "learning_rate": 9.266283635400981e-06, "loss": 0.6779, "step": 3130 }, { "epoch": 0.4, "grad_norm": 0.6201031681120114, "learning_rate": 9.265745581584297e-06, "loss": 0.5428, "step": 3131 }, { "epoch": 0.4, "grad_norm": 0.8656280762912113, "learning_rate": 9.265207346188456e-06, "loss": 0.6874, "step": 3132 }, { "epoch": 0.4, "grad_norm": 1.8283757591201077, "learning_rate": 9.264668929236365e-06, "loss": 0.5856, "step": 3133 }, { "epoch": 0.4, "grad_norm": 0.5957631667069805, "learning_rate": 9.264130330750949e-06, "loss": 0.5381, "step": 3134 }, { "epoch": 0.4, "grad_norm": 0.7690889525596007, "learning_rate": 9.26359155075513e-06, "loss": 0.6318, "step": 3135 }, { "epoch": 0.4, "grad_norm": 0.7993677063373822, "learning_rate": 9.263052589271841e-06, "loss": 0.6209, "step": 3136 }, { "epoch": 0.4, "grad_norm": 0.6530563933064154, "learning_rate": 9.262513446324027e-06, "loss": 0.5409, "step": 3137 }, { "epoch": 0.4, "grad_norm": 0.9396208535520592, "learning_rate": 9.261974121934637e-06, "loss": 0.6181, "step": 3138 }, { "epoch": 0.4, "grad_norm": 0.5817774325248966, "learning_rate": 9.261434616126628e-06, "loss": 0.5174, "step": 3139 }, { "epoch": 0.4, "grad_norm": 0.8179224407841669, "learning_rate": 9.260894928922966e-06, "loss": 0.5769, "step": 3140 }, { "epoch": 0.4, "grad_norm": 0.6675953013812651, "learning_rate": 9.260355060346622e-06, "loss": 0.5401, "step": 3141 }, { "epoch": 0.4, "grad_norm": 0.6047063955764763, "learning_rate": 9.259815010420577e-06, "loss": 0.5212, "step": 3142 }, { "epoch": 0.4, "grad_norm": 0.8137771625454581, "learning_rate": 9.259274779167819e-06, "loss": 0.5973, "step": 3143 }, { "epoch": 0.4, "grad_norm": 0.5891073724621664, "learning_rate": 9.258734366611345e-06, "loss": 0.4764, "step": 3144 }, { "epoch": 0.4, "grad_norm": 0.6936274196117064, "learning_rate": 9.258193772774158e-06, "loss": 0.5844, "step": 3145 }, { "epoch": 0.4, "grad_norm": 0.6005108952985638, "learning_rate": 9.257652997679268e-06, "loss": 0.5612, "step": 3146 }, { "epoch": 0.4, "grad_norm": 0.677342080236454, "learning_rate": 9.257112041349697e-06, "loss": 0.547, "step": 3147 }, { "epoch": 0.4, "grad_norm": 0.6082332538703317, "learning_rate": 9.25657090380847e-06, "loss": 0.5582, "step": 3148 }, { "epoch": 0.4, "grad_norm": 0.8679094395986775, "learning_rate": 9.256029585078622e-06, "loss": 0.6566, "step": 3149 }, { "epoch": 0.4, "grad_norm": 0.6503681281399057, "learning_rate": 9.255488085183193e-06, "loss": 0.5036, "step": 3150 }, { "epoch": 0.4, "grad_norm": 0.7907090344306829, "learning_rate": 9.254946404145236e-06, "loss": 0.6045, "step": 3151 }, { "epoch": 0.4, "grad_norm": 0.8024687216639622, "learning_rate": 9.254404541987807e-06, "loss": 0.645, "step": 3152 }, { "epoch": 0.4, "grad_norm": 0.5642301377950192, "learning_rate": 9.253862498733972e-06, "loss": 0.535, "step": 3153 }, { "epoch": 0.4, "grad_norm": 0.7766938738624718, "learning_rate": 9.253320274406806e-06, "loss": 0.5819, "step": 3154 }, { "epoch": 0.4, "grad_norm": 0.6637008314505372, "learning_rate": 9.252777869029386e-06, "loss": 0.5625, "step": 3155 }, { "epoch": 0.4, "grad_norm": 0.7747869499782861, "learning_rate": 9.252235282624801e-06, "loss": 0.594, "step": 3156 }, { "epoch": 0.4, "grad_norm": 0.6668475851783979, "learning_rate": 9.251692515216148e-06, "loss": 0.5273, "step": 3157 }, { "epoch": 0.4, "grad_norm": 0.8544566294686278, "learning_rate": 9.251149566826533e-06, "loss": 0.5744, "step": 3158 }, { "epoch": 0.4, "grad_norm": 0.781423719362171, "learning_rate": 9.250606437479064e-06, "loss": 0.5431, "step": 3159 }, { "epoch": 0.4, "grad_norm": 0.6466778267197053, "learning_rate": 9.250063127196863e-06, "loss": 0.5671, "step": 3160 }, { "epoch": 0.4, "grad_norm": 0.7482778024640498, "learning_rate": 9.249519636003056e-06, "loss": 0.5943, "step": 3161 }, { "epoch": 0.4, "grad_norm": 0.6519006468416555, "learning_rate": 9.248975963920777e-06, "loss": 0.549, "step": 3162 }, { "epoch": 0.4, "grad_norm": 0.6622251030234204, "learning_rate": 9.24843211097317e-06, "loss": 0.5245, "step": 3163 }, { "epoch": 0.4, "grad_norm": 0.7375639692494137, "learning_rate": 9.247888077183382e-06, "loss": 0.5534, "step": 3164 }, { "epoch": 0.4, "grad_norm": 0.7985924562798349, "learning_rate": 9.247343862574575e-06, "loss": 0.6107, "step": 3165 }, { "epoch": 0.4, "grad_norm": 0.5996574487719852, "learning_rate": 9.24679946716991e-06, "loss": 0.4911, "step": 3166 }, { "epoch": 0.4, "grad_norm": 0.5761423746533978, "learning_rate": 9.246254890992565e-06, "loss": 0.5138, "step": 3167 }, { "epoch": 0.4, "grad_norm": 0.7984236349512767, "learning_rate": 9.245710134065718e-06, "loss": 0.6416, "step": 3168 }, { "epoch": 0.4, "grad_norm": 0.7003687244052177, "learning_rate": 9.245165196412558e-06, "loss": 0.5389, "step": 3169 }, { "epoch": 0.4, "grad_norm": 1.158307663065942, "learning_rate": 9.244620078056281e-06, "loss": 0.5646, "step": 3170 }, { "epoch": 0.4, "grad_norm": 0.7284331813544216, "learning_rate": 9.244074779020088e-06, "loss": 0.6357, "step": 3171 }, { "epoch": 0.4, "grad_norm": 0.6903426199710162, "learning_rate": 9.243529299327198e-06, "loss": 0.5638, "step": 3172 }, { "epoch": 0.4, "grad_norm": 0.6913996326403633, "learning_rate": 9.242983639000825e-06, "loss": 0.6357, "step": 3173 }, { "epoch": 0.4, "grad_norm": 0.8558684031398051, "learning_rate": 9.242437798064197e-06, "loss": 0.5699, "step": 3174 }, { "epoch": 0.4, "grad_norm": 0.6233453702160608, "learning_rate": 9.241891776540549e-06, "loss": 0.562, "step": 3175 }, { "epoch": 0.4, "grad_norm": 0.7623320687446391, "learning_rate": 9.241345574453122e-06, "loss": 0.6822, "step": 3176 }, { "epoch": 0.4, "grad_norm": 0.8212404104712616, "learning_rate": 9.240799191825167e-06, "loss": 0.6634, "step": 3177 }, { "epoch": 0.4, "grad_norm": 0.6892695539055483, "learning_rate": 9.240252628679945e-06, "loss": 0.5825, "step": 3178 }, { "epoch": 0.4, "grad_norm": 0.7474256591434516, "learning_rate": 9.239705885040716e-06, "loss": 0.5885, "step": 3179 }, { "epoch": 0.41, "grad_norm": 0.701528282681911, "learning_rate": 9.239158960930757e-06, "loss": 0.5961, "step": 3180 }, { "epoch": 0.41, "grad_norm": 0.614972457800695, "learning_rate": 9.238611856373346e-06, "loss": 0.5648, "step": 3181 }, { "epoch": 0.41, "grad_norm": 0.7016096696878144, "learning_rate": 9.238064571391772e-06, "loss": 0.5871, "step": 3182 }, { "epoch": 0.41, "grad_norm": 0.6065021393621388, "learning_rate": 9.237517106009335e-06, "loss": 0.5456, "step": 3183 }, { "epoch": 0.41, "grad_norm": 0.6165816396422367, "learning_rate": 9.236969460249335e-06, "loss": 0.4853, "step": 3184 }, { "epoch": 0.41, "grad_norm": 0.9406136898950629, "learning_rate": 9.236421634135085e-06, "loss": 0.6237, "step": 3185 }, { "epoch": 0.41, "grad_norm": 0.7294982034530778, "learning_rate": 9.235873627689902e-06, "loss": 0.5752, "step": 3186 }, { "epoch": 0.41, "grad_norm": 0.6475248770643098, "learning_rate": 9.235325440937115e-06, "loss": 0.5316, "step": 3187 }, { "epoch": 0.41, "grad_norm": 0.8161288507107136, "learning_rate": 9.234777073900059e-06, "loss": 0.6273, "step": 3188 }, { "epoch": 0.41, "grad_norm": 0.9524174405826741, "learning_rate": 9.234228526602074e-06, "loss": 0.6585, "step": 3189 }, { "epoch": 0.41, "grad_norm": 0.7622085834034301, "learning_rate": 9.233679799066514e-06, "loss": 0.5887, "step": 3190 }, { "epoch": 0.41, "grad_norm": 0.8001198885334686, "learning_rate": 9.233130891316731e-06, "loss": 0.6415, "step": 3191 }, { "epoch": 0.41, "grad_norm": 0.7299578815119844, "learning_rate": 9.232581803376095e-06, "loss": 0.6307, "step": 3192 }, { "epoch": 0.41, "grad_norm": 0.7906144659315226, "learning_rate": 9.232032535267976e-06, "loss": 0.6024, "step": 3193 }, { "epoch": 0.41, "grad_norm": 0.7027847323877751, "learning_rate": 9.231483087015755e-06, "loss": 0.6417, "step": 3194 }, { "epoch": 0.41, "grad_norm": 0.8302817529568468, "learning_rate": 9.230933458642823e-06, "loss": 0.562, "step": 3195 }, { "epoch": 0.41, "grad_norm": 0.8739537638604074, "learning_rate": 9.230383650172572e-06, "loss": 0.6126, "step": 3196 }, { "epoch": 0.41, "grad_norm": 0.5171971506646962, "learning_rate": 9.229833661628409e-06, "loss": 0.4514, "step": 3197 }, { "epoch": 0.41, "grad_norm": 0.7834864512856085, "learning_rate": 9.229283493033742e-06, "loss": 0.621, "step": 3198 }, { "epoch": 0.41, "grad_norm": 0.6275152321814564, "learning_rate": 9.228733144411993e-06, "loss": 0.5007, "step": 3199 }, { "epoch": 0.41, "grad_norm": 1.1290054452156002, "learning_rate": 9.228182615786585e-06, "loss": 0.6702, "step": 3200 }, { "epoch": 0.41, "grad_norm": 0.8395437716995005, "learning_rate": 9.227631907180956e-06, "loss": 0.5648, "step": 3201 }, { "epoch": 0.41, "grad_norm": 0.7740440337826754, "learning_rate": 9.227081018618546e-06, "loss": 0.6478, "step": 3202 }, { "epoch": 0.41, "grad_norm": 0.7305535433094422, "learning_rate": 9.226529950122805e-06, "loss": 0.5504, "step": 3203 }, { "epoch": 0.41, "grad_norm": 0.5646711821983378, "learning_rate": 9.225978701717189e-06, "loss": 0.5423, "step": 3204 }, { "epoch": 0.41, "grad_norm": 0.6183282577703919, "learning_rate": 9.225427273425165e-06, "loss": 0.5615, "step": 3205 }, { "epoch": 0.41, "grad_norm": 0.6229665852162618, "learning_rate": 9.224875665270206e-06, "loss": 0.5171, "step": 3206 }, { "epoch": 0.41, "grad_norm": 0.5431499851299624, "learning_rate": 9.224323877275786e-06, "loss": 0.5199, "step": 3207 }, { "epoch": 0.41, "grad_norm": 0.6601271472318798, "learning_rate": 9.223771909465402e-06, "loss": 0.5443, "step": 3208 }, { "epoch": 0.41, "grad_norm": 0.6404625223145297, "learning_rate": 9.223219761862545e-06, "loss": 0.5509, "step": 3209 }, { "epoch": 0.41, "grad_norm": 1.7037978654532602, "learning_rate": 9.222667434490716e-06, "loss": 0.6473, "step": 3210 }, { "epoch": 0.41, "grad_norm": 0.721589651611027, "learning_rate": 9.222114927373429e-06, "loss": 0.5679, "step": 3211 }, { "epoch": 0.41, "grad_norm": 0.6568083250973575, "learning_rate": 9.2215622405342e-06, "loss": 0.5928, "step": 3212 }, { "epoch": 0.41, "grad_norm": 0.7164926491186786, "learning_rate": 9.22100937399656e-06, "loss": 0.6043, "step": 3213 }, { "epoch": 0.41, "grad_norm": 0.8310340783994664, "learning_rate": 9.220456327784038e-06, "loss": 0.6041, "step": 3214 }, { "epoch": 0.41, "grad_norm": 0.7426404072208325, "learning_rate": 9.219903101920176e-06, "loss": 0.6161, "step": 3215 }, { "epoch": 0.41, "grad_norm": 0.8143939296303716, "learning_rate": 9.219349696428523e-06, "loss": 0.6158, "step": 3216 }, { "epoch": 0.41, "grad_norm": 0.7328724560042611, "learning_rate": 9.218796111332638e-06, "loss": 0.5665, "step": 3217 }, { "epoch": 0.41, "grad_norm": 1.014845447845572, "learning_rate": 9.218242346656083e-06, "loss": 0.625, "step": 3218 }, { "epoch": 0.41, "grad_norm": 0.9705993139030301, "learning_rate": 9.217688402422431e-06, "loss": 0.6891, "step": 3219 }, { "epoch": 0.41, "grad_norm": 0.7499917739142293, "learning_rate": 9.217134278655262e-06, "loss": 0.5755, "step": 3220 }, { "epoch": 0.41, "grad_norm": 0.7125004999350245, "learning_rate": 9.216579975378163e-06, "loss": 0.5191, "step": 3221 }, { "epoch": 0.41, "grad_norm": 0.854810055587463, "learning_rate": 9.21602549261473e-06, "loss": 0.6089, "step": 3222 }, { "epoch": 0.41, "grad_norm": 0.5691407539342304, "learning_rate": 9.215470830388562e-06, "loss": 0.5109, "step": 3223 }, { "epoch": 0.41, "grad_norm": 0.6172279238982875, "learning_rate": 9.21491598872327e-06, "loss": 0.5294, "step": 3224 }, { "epoch": 0.41, "grad_norm": 0.7981602876094932, "learning_rate": 9.214360967642477e-06, "loss": 0.6234, "step": 3225 }, { "epoch": 0.41, "grad_norm": 0.6538744727864495, "learning_rate": 9.213805767169802e-06, "loss": 0.5342, "step": 3226 }, { "epoch": 0.41, "grad_norm": 0.7244812345654913, "learning_rate": 9.213250387328882e-06, "loss": 0.5867, "step": 3227 }, { "epoch": 0.41, "grad_norm": 0.7182566480602726, "learning_rate": 9.212694828143356e-06, "loss": 0.5795, "step": 3228 }, { "epoch": 0.41, "grad_norm": 0.8816372560725132, "learning_rate": 9.212139089636875e-06, "loss": 0.627, "step": 3229 }, { "epoch": 0.41, "grad_norm": 0.5951494522354634, "learning_rate": 9.211583171833092e-06, "loss": 0.5791, "step": 3230 }, { "epoch": 0.41, "grad_norm": 0.6931626072454565, "learning_rate": 9.211027074755671e-06, "loss": 0.5897, "step": 3231 }, { "epoch": 0.41, "grad_norm": 0.6833677451982374, "learning_rate": 9.210470798428283e-06, "loss": 0.5215, "step": 3232 }, { "epoch": 0.41, "grad_norm": 1.282989705730721, "learning_rate": 9.20991434287461e-06, "loss": 0.6345, "step": 3233 }, { "epoch": 0.41, "grad_norm": 0.5855130011417814, "learning_rate": 9.209357708118335e-06, "loss": 0.5138, "step": 3234 }, { "epoch": 0.41, "grad_norm": 0.596196516944751, "learning_rate": 9.208800894183154e-06, "loss": 0.5724, "step": 3235 }, { "epoch": 0.41, "grad_norm": 0.7480587414485678, "learning_rate": 9.208243901092769e-06, "loss": 0.6149, "step": 3236 }, { "epoch": 0.41, "grad_norm": 0.8518895744305718, "learning_rate": 9.207686728870889e-06, "loss": 0.683, "step": 3237 }, { "epoch": 0.41, "grad_norm": 0.892959194286907, "learning_rate": 9.207129377541229e-06, "loss": 0.6175, "step": 3238 }, { "epoch": 0.41, "grad_norm": 0.5810982847496905, "learning_rate": 9.206571847127517e-06, "loss": 0.5218, "step": 3239 }, { "epoch": 0.41, "grad_norm": 0.8535470654985331, "learning_rate": 9.206014137653482e-06, "loss": 0.6381, "step": 3240 }, { "epoch": 0.41, "grad_norm": 0.633664919505574, "learning_rate": 9.205456249142868e-06, "loss": 0.555, "step": 3241 }, { "epoch": 0.41, "grad_norm": 0.8160371523072273, "learning_rate": 9.204898181619419e-06, "loss": 0.5539, "step": 3242 }, { "epoch": 0.41, "grad_norm": 0.7079567478379215, "learning_rate": 9.204339935106892e-06, "loss": 0.599, "step": 3243 }, { "epoch": 0.41, "grad_norm": 0.7615625775965033, "learning_rate": 9.203781509629048e-06, "loss": 0.5952, "step": 3244 }, { "epoch": 0.41, "grad_norm": 0.7544846974124569, "learning_rate": 9.203222905209659e-06, "loss": 0.6333, "step": 3245 }, { "epoch": 0.41, "grad_norm": 0.6298341210044557, "learning_rate": 9.202664121872502e-06, "loss": 0.5374, "step": 3246 }, { "epoch": 0.41, "grad_norm": 0.5971652832986465, "learning_rate": 9.202105159641363e-06, "loss": 0.5817, "step": 3247 }, { "epoch": 0.41, "grad_norm": 0.7216286265188634, "learning_rate": 9.201546018540037e-06, "loss": 0.509, "step": 3248 }, { "epoch": 0.41, "grad_norm": 0.8146484420358, "learning_rate": 9.200986698592323e-06, "loss": 0.6646, "step": 3249 }, { "epoch": 0.41, "grad_norm": 0.6470741855321337, "learning_rate": 9.200427199822028e-06, "loss": 0.5547, "step": 3250 }, { "epoch": 0.41, "grad_norm": 0.5307516565538946, "learning_rate": 9.19986752225297e-06, "loss": 0.4952, "step": 3251 }, { "epoch": 0.41, "grad_norm": 0.7499313470267501, "learning_rate": 9.199307665908974e-06, "loss": 0.6223, "step": 3252 }, { "epoch": 0.41, "grad_norm": 0.7873693261187309, "learning_rate": 9.19874763081387e-06, "loss": 0.5919, "step": 3253 }, { "epoch": 0.41, "grad_norm": 0.8312459521266437, "learning_rate": 9.198187416991493e-06, "loss": 0.5581, "step": 3254 }, { "epoch": 0.41, "grad_norm": 0.6218252502503935, "learning_rate": 9.197627024465697e-06, "loss": 0.5271, "step": 3255 }, { "epoch": 0.41, "grad_norm": 0.6793659870096235, "learning_rate": 9.197066453260331e-06, "loss": 0.5878, "step": 3256 }, { "epoch": 0.41, "grad_norm": 0.8097458712161544, "learning_rate": 9.196505703399257e-06, "loss": 0.6745, "step": 3257 }, { "epoch": 0.42, "grad_norm": 0.8928733149820136, "learning_rate": 9.195944774906347e-06, "loss": 0.6213, "step": 3258 }, { "epoch": 0.42, "grad_norm": 0.6472409547449489, "learning_rate": 9.195383667805475e-06, "loss": 0.5767, "step": 3259 }, { "epoch": 0.42, "grad_norm": 0.6662930157629626, "learning_rate": 9.194822382120527e-06, "loss": 0.5626, "step": 3260 }, { "epoch": 0.42, "grad_norm": 0.9905387848773961, "learning_rate": 9.194260917875395e-06, "loss": 0.635, "step": 3261 }, { "epoch": 0.42, "grad_norm": 0.6018491781279309, "learning_rate": 9.193699275093979e-06, "loss": 0.5283, "step": 3262 }, { "epoch": 0.42, "grad_norm": 0.7426946739996282, "learning_rate": 9.193137453800185e-06, "loss": 0.5722, "step": 3263 }, { "epoch": 0.42, "grad_norm": 0.5828078313417627, "learning_rate": 9.19257545401793e-06, "loss": 0.5268, "step": 3264 }, { "epoch": 0.42, "grad_norm": 0.9124170137727117, "learning_rate": 9.192013275771136e-06, "loss": 0.6762, "step": 3265 }, { "epoch": 0.42, "grad_norm": 0.7039819945593421, "learning_rate": 9.191450919083731e-06, "loss": 0.5918, "step": 3266 }, { "epoch": 0.42, "grad_norm": 0.7032769511281745, "learning_rate": 9.190888383979655e-06, "loss": 0.5774, "step": 3267 }, { "epoch": 0.42, "grad_norm": 0.7520639233199657, "learning_rate": 9.190325670482854e-06, "loss": 0.6654, "step": 3268 }, { "epoch": 0.42, "grad_norm": 0.6459007792468826, "learning_rate": 9.189762778617277e-06, "loss": 0.552, "step": 3269 }, { "epoch": 0.42, "grad_norm": 0.7001160285666483, "learning_rate": 9.189199708406889e-06, "loss": 0.5639, "step": 3270 }, { "epoch": 0.42, "grad_norm": 0.6140801763175019, "learning_rate": 9.188636459875655e-06, "loss": 0.5589, "step": 3271 }, { "epoch": 0.42, "grad_norm": 1.153917460766353, "learning_rate": 9.188073033047553e-06, "loss": 0.6643, "step": 3272 }, { "epoch": 0.42, "grad_norm": 0.7246733619528902, "learning_rate": 9.187509427946565e-06, "loss": 0.5491, "step": 3273 }, { "epoch": 0.42, "grad_norm": 0.5658520188236634, "learning_rate": 9.18694564459668e-06, "loss": 0.5345, "step": 3274 }, { "epoch": 0.42, "grad_norm": 0.8130835488751036, "learning_rate": 9.1863816830219e-06, "loss": 0.5984, "step": 3275 }, { "epoch": 0.42, "grad_norm": 0.5737981169444385, "learning_rate": 9.185817543246232e-06, "loss": 0.5141, "step": 3276 }, { "epoch": 0.42, "grad_norm": 0.6484394591916782, "learning_rate": 9.185253225293684e-06, "loss": 0.574, "step": 3277 }, { "epoch": 0.42, "grad_norm": 0.8387932616442788, "learning_rate": 9.184688729188281e-06, "loss": 0.5799, "step": 3278 }, { "epoch": 0.42, "grad_norm": 0.6059481311429133, "learning_rate": 9.18412405495405e-06, "loss": 0.4868, "step": 3279 }, { "epoch": 0.42, "grad_norm": 0.5912994012192516, "learning_rate": 9.18355920261503e-06, "loss": 0.5601, "step": 3280 }, { "epoch": 0.42, "grad_norm": 0.6053788989091289, "learning_rate": 9.182994172195266e-06, "loss": 0.5473, "step": 3281 }, { "epoch": 0.42, "grad_norm": 0.6659146771005405, "learning_rate": 9.182428963718804e-06, "loss": 0.5679, "step": 3282 }, { "epoch": 0.42, "grad_norm": 0.8035257479160005, "learning_rate": 9.181863577209705e-06, "loss": 0.6232, "step": 3283 }, { "epoch": 0.42, "grad_norm": 0.8270559144867194, "learning_rate": 9.181298012692039e-06, "loss": 0.6617, "step": 3284 }, { "epoch": 0.42, "grad_norm": 0.731995430929404, "learning_rate": 9.180732270189877e-06, "loss": 0.5651, "step": 3285 }, { "epoch": 0.42, "grad_norm": 0.6317926449243199, "learning_rate": 9.180166349727303e-06, "loss": 0.542, "step": 3286 }, { "epoch": 0.42, "grad_norm": 0.74161766044179, "learning_rate": 9.179600251328405e-06, "loss": 0.6183, "step": 3287 }, { "epoch": 0.42, "grad_norm": 0.6565143061279405, "learning_rate": 9.17903397501728e-06, "loss": 0.5836, "step": 3288 }, { "epoch": 0.42, "grad_norm": 0.9126999944088788, "learning_rate": 9.178467520818033e-06, "loss": 0.6003, "step": 3289 }, { "epoch": 0.42, "grad_norm": 0.8470934289346624, "learning_rate": 9.177900888754775e-06, "loss": 0.6555, "step": 3290 }, { "epoch": 0.42, "grad_norm": 0.6351474911842814, "learning_rate": 9.17733407885163e-06, "loss": 0.5221, "step": 3291 }, { "epoch": 0.42, "grad_norm": 0.6176272144988341, "learning_rate": 9.176767091132718e-06, "loss": 0.5505, "step": 3292 }, { "epoch": 0.42, "grad_norm": 0.5704415893873597, "learning_rate": 9.176199925622183e-06, "loss": 0.5121, "step": 3293 }, { "epoch": 0.42, "grad_norm": 0.6747566530757642, "learning_rate": 9.175632582344158e-06, "loss": 0.4967, "step": 3294 }, { "epoch": 0.42, "grad_norm": 0.7974450169225238, "learning_rate": 9.175065061322799e-06, "loss": 0.6486, "step": 3295 }, { "epoch": 0.42, "grad_norm": 0.5971550840646895, "learning_rate": 9.174497362582262e-06, "loss": 0.5224, "step": 3296 }, { "epoch": 0.42, "grad_norm": 0.8160735889062235, "learning_rate": 9.173929486146713e-06, "loss": 0.637, "step": 3297 }, { "epoch": 0.42, "grad_norm": 0.7745251199837998, "learning_rate": 9.173361432040321e-06, "loss": 0.6084, "step": 3298 }, { "epoch": 0.42, "grad_norm": 0.6976358887644472, "learning_rate": 9.172793200287272e-06, "loss": 0.5466, "step": 3299 }, { "epoch": 0.42, "grad_norm": 0.64552829772451, "learning_rate": 9.172224790911748e-06, "loss": 0.5751, "step": 3300 }, { "epoch": 0.42, "grad_norm": 0.6585268850253904, "learning_rate": 9.17165620393795e-06, "loss": 0.6338, "step": 3301 }, { "epoch": 0.42, "grad_norm": 0.6721880760623039, "learning_rate": 9.171087439390076e-06, "loss": 0.5549, "step": 3302 }, { "epoch": 0.42, "grad_norm": 0.5779134645775404, "learning_rate": 9.17051849729234e-06, "loss": 0.4685, "step": 3303 }, { "epoch": 0.42, "grad_norm": 0.9383967938567331, "learning_rate": 9.169949377668958e-06, "loss": 0.7189, "step": 3304 }, { "epoch": 0.42, "grad_norm": 0.618739152947225, "learning_rate": 9.169380080544157e-06, "loss": 0.5339, "step": 3305 }, { "epoch": 0.42, "grad_norm": 0.6074685136042486, "learning_rate": 9.168810605942168e-06, "loss": 0.5385, "step": 3306 }, { "epoch": 0.42, "grad_norm": 2.3964853215673774, "learning_rate": 9.168240953887234e-06, "loss": 0.6239, "step": 3307 }, { "epoch": 0.42, "grad_norm": 0.8854781551920204, "learning_rate": 9.167671124403601e-06, "loss": 0.6395, "step": 3308 }, { "epoch": 0.42, "grad_norm": 0.6341463647232668, "learning_rate": 9.167101117515528e-06, "loss": 0.5084, "step": 3309 }, { "epoch": 0.42, "grad_norm": 0.6204243305855078, "learning_rate": 9.166530933247276e-06, "loss": 0.4837, "step": 3310 }, { "epoch": 0.42, "grad_norm": 0.8782656556572855, "learning_rate": 9.165960571623116e-06, "loss": 0.6147, "step": 3311 }, { "epoch": 0.42, "grad_norm": 0.5921902972365098, "learning_rate": 9.165390032667328e-06, "loss": 0.5314, "step": 3312 }, { "epoch": 0.42, "grad_norm": 0.6168798103946963, "learning_rate": 9.164819316404197e-06, "loss": 0.549, "step": 3313 }, { "epoch": 0.42, "grad_norm": 0.6546515366806996, "learning_rate": 9.164248422858015e-06, "loss": 0.5533, "step": 3314 }, { "epoch": 0.42, "grad_norm": 0.527765102681221, "learning_rate": 9.163677352053087e-06, "loss": 0.4784, "step": 3315 }, { "epoch": 0.42, "grad_norm": 0.5243501757772494, "learning_rate": 9.163106104013717e-06, "loss": 0.5187, "step": 3316 }, { "epoch": 0.42, "grad_norm": 0.8512706500980287, "learning_rate": 9.162534678764225e-06, "loss": 0.6358, "step": 3317 }, { "epoch": 0.42, "grad_norm": 0.801562613911495, "learning_rate": 9.161963076328933e-06, "loss": 0.6723, "step": 3318 }, { "epoch": 0.42, "grad_norm": 0.9063120931367663, "learning_rate": 9.161391296732171e-06, "loss": 0.6395, "step": 3319 }, { "epoch": 0.42, "grad_norm": 0.6613677803151039, "learning_rate": 9.160819339998282e-06, "loss": 0.5656, "step": 3320 }, { "epoch": 0.42, "grad_norm": 0.8788521624452991, "learning_rate": 9.160247206151608e-06, "loss": 0.6328, "step": 3321 }, { "epoch": 0.42, "grad_norm": 0.9102758162586393, "learning_rate": 9.159674895216507e-06, "loss": 0.6288, "step": 3322 }, { "epoch": 0.42, "grad_norm": 0.6292348531350221, "learning_rate": 9.159102407217334e-06, "loss": 0.5138, "step": 3323 }, { "epoch": 0.42, "grad_norm": 0.6926993439377039, "learning_rate": 9.158529742178466e-06, "loss": 0.5795, "step": 3324 }, { "epoch": 0.42, "grad_norm": 0.8196574769217888, "learning_rate": 9.157956900124273e-06, "loss": 0.6138, "step": 3325 }, { "epoch": 0.42, "grad_norm": 0.6584602786355435, "learning_rate": 9.157383881079143e-06, "loss": 0.5792, "step": 3326 }, { "epoch": 0.42, "grad_norm": 1.0104669688959764, "learning_rate": 9.156810685067467e-06, "loss": 0.6747, "step": 3327 }, { "epoch": 0.42, "grad_norm": 0.6532782793076529, "learning_rate": 9.156237312113642e-06, "loss": 0.5303, "step": 3328 }, { "epoch": 0.42, "grad_norm": 0.8287066372914469, "learning_rate": 9.155663762242076e-06, "loss": 0.6336, "step": 3329 }, { "epoch": 0.42, "grad_norm": 0.6742819142673648, "learning_rate": 9.155090035477185e-06, "loss": 0.579, "step": 3330 }, { "epoch": 0.42, "grad_norm": 0.953562904369008, "learning_rate": 9.154516131843388e-06, "loss": 0.6716, "step": 3331 }, { "epoch": 0.42, "grad_norm": 0.6873147643866758, "learning_rate": 9.153942051365115e-06, "loss": 0.5199, "step": 3332 }, { "epoch": 0.42, "grad_norm": 0.9029028758271641, "learning_rate": 9.153367794066802e-06, "loss": 0.5479, "step": 3333 }, { "epoch": 0.42, "grad_norm": 0.6772567790207706, "learning_rate": 9.152793359972894e-06, "loss": 0.5512, "step": 3334 }, { "epoch": 0.42, "grad_norm": 0.846653051375728, "learning_rate": 9.152218749107845e-06, "loss": 0.6476, "step": 3335 }, { "epoch": 0.42, "grad_norm": 0.624086643969705, "learning_rate": 9.151643961496111e-06, "loss": 0.575, "step": 3336 }, { "epoch": 0.43, "grad_norm": 0.9148599476333431, "learning_rate": 9.151068997162163e-06, "loss": 0.6742, "step": 3337 }, { "epoch": 0.43, "grad_norm": 0.5664197078715828, "learning_rate": 9.15049385613047e-06, "loss": 0.5124, "step": 3338 }, { "epoch": 0.43, "grad_norm": 0.6044488824927788, "learning_rate": 9.149918538425517e-06, "loss": 0.5296, "step": 3339 }, { "epoch": 0.43, "grad_norm": 0.5886124520905012, "learning_rate": 9.149343044071793e-06, "loss": 0.5609, "step": 3340 }, { "epoch": 0.43, "grad_norm": 0.5699769180161598, "learning_rate": 9.148767373093796e-06, "loss": 0.5536, "step": 3341 }, { "epoch": 0.43, "grad_norm": 0.5924989604629504, "learning_rate": 9.148191525516027e-06, "loss": 0.5398, "step": 3342 }, { "epoch": 0.43, "grad_norm": 0.7687488774906683, "learning_rate": 9.147615501363004e-06, "loss": 0.5943, "step": 3343 }, { "epoch": 0.43, "grad_norm": 0.5523382658679526, "learning_rate": 9.147039300659241e-06, "loss": 0.5572, "step": 3344 }, { "epoch": 0.43, "grad_norm": 0.7726880357411495, "learning_rate": 9.146462923429269e-06, "loss": 0.6564, "step": 3345 }, { "epoch": 0.43, "grad_norm": 0.5975929958069081, "learning_rate": 9.145886369697618e-06, "loss": 0.5452, "step": 3346 }, { "epoch": 0.43, "grad_norm": 0.7360591699213976, "learning_rate": 9.145309639488835e-06, "loss": 0.6373, "step": 3347 }, { "epoch": 0.43, "grad_norm": 0.5683055490433099, "learning_rate": 9.144732732827464e-06, "loss": 0.5132, "step": 3348 }, { "epoch": 0.43, "grad_norm": 0.8434869696891951, "learning_rate": 9.144155649738067e-06, "loss": 0.6544, "step": 3349 }, { "epoch": 0.43, "grad_norm": 0.6122219318901587, "learning_rate": 9.143578390245207e-06, "loss": 0.5217, "step": 3350 }, { "epoch": 0.43, "grad_norm": 0.9442240314101615, "learning_rate": 9.143000954373455e-06, "loss": 0.619, "step": 3351 }, { "epoch": 0.43, "grad_norm": 0.8609016654664363, "learning_rate": 9.142423342147392e-06, "loss": 0.6901, "step": 3352 }, { "epoch": 0.43, "grad_norm": 0.8100428882519414, "learning_rate": 9.141845553591606e-06, "loss": 0.6494, "step": 3353 }, { "epoch": 0.43, "grad_norm": 0.761576052846754, "learning_rate": 9.141267588730689e-06, "loss": 0.6327, "step": 3354 }, { "epoch": 0.43, "grad_norm": 0.6165467173058305, "learning_rate": 9.140689447589245e-06, "loss": 0.542, "step": 3355 }, { "epoch": 0.43, "grad_norm": 0.5876969642271552, "learning_rate": 9.140111130191882e-06, "loss": 0.5399, "step": 3356 }, { "epoch": 0.43, "grad_norm": 0.6287577389873411, "learning_rate": 9.139532636563219e-06, "loss": 0.5657, "step": 3357 }, { "epoch": 0.43, "grad_norm": 0.8379049445511668, "learning_rate": 9.13895396672788e-06, "loss": 0.5946, "step": 3358 }, { "epoch": 0.43, "grad_norm": 0.5379736932317782, "learning_rate": 9.138375120710495e-06, "loss": 0.5038, "step": 3359 }, { "epoch": 0.43, "grad_norm": 0.8809318084173557, "learning_rate": 9.137796098535707e-06, "loss": 0.6579, "step": 3360 }, { "epoch": 0.43, "grad_norm": 0.6072689367456056, "learning_rate": 9.137216900228161e-06, "loss": 0.5149, "step": 3361 }, { "epoch": 0.43, "grad_norm": 0.6422148248983705, "learning_rate": 9.13663752581251e-06, "loss": 0.5391, "step": 3362 }, { "epoch": 0.43, "grad_norm": 0.6986428452209693, "learning_rate": 9.136057975313422e-06, "loss": 0.5689, "step": 3363 }, { "epoch": 0.43, "grad_norm": 0.5706498515380872, "learning_rate": 9.135478248755561e-06, "loss": 0.5233, "step": 3364 }, { "epoch": 0.43, "grad_norm": 0.6477492814285084, "learning_rate": 9.134898346163608e-06, "loss": 0.5261, "step": 3365 }, { "epoch": 0.43, "grad_norm": 0.5451511723075446, "learning_rate": 9.134318267562244e-06, "loss": 0.4821, "step": 3366 }, { "epoch": 0.43, "grad_norm": 1.0732824412215014, "learning_rate": 9.133738012976163e-06, "loss": 0.659, "step": 3367 }, { "epoch": 0.43, "grad_norm": 0.6449010183592399, "learning_rate": 9.133157582430063e-06, "loss": 0.5152, "step": 3368 }, { "epoch": 0.43, "grad_norm": 0.8101058622650507, "learning_rate": 9.132576975948655e-06, "loss": 0.5877, "step": 3369 }, { "epoch": 0.43, "grad_norm": 0.7507506869939086, "learning_rate": 9.13199619355665e-06, "loss": 0.5631, "step": 3370 }, { "epoch": 0.43, "grad_norm": 0.845321121448081, "learning_rate": 9.13141523527877e-06, "loss": 0.6395, "step": 3371 }, { "epoch": 0.43, "grad_norm": 0.7082437538968397, "learning_rate": 9.130834101139746e-06, "loss": 0.5613, "step": 3372 }, { "epoch": 0.43, "grad_norm": 0.9805342219127094, "learning_rate": 9.130252791164316e-06, "loss": 0.6344, "step": 3373 }, { "epoch": 0.43, "grad_norm": 0.7900535866295054, "learning_rate": 9.129671305377221e-06, "loss": 0.6313, "step": 3374 }, { "epoch": 0.43, "grad_norm": 0.6552233602094087, "learning_rate": 9.129089643803214e-06, "loss": 0.629, "step": 3375 }, { "epoch": 0.43, "grad_norm": 0.6125297194096332, "learning_rate": 9.12850780646706e-06, "loss": 0.5558, "step": 3376 }, { "epoch": 0.43, "grad_norm": 0.7039150157695548, "learning_rate": 9.127925793393518e-06, "loss": 0.5445, "step": 3377 }, { "epoch": 0.43, "grad_norm": 0.5808338395472502, "learning_rate": 9.127343604607368e-06, "loss": 0.5222, "step": 3378 }, { "epoch": 0.43, "grad_norm": 0.6737747923260493, "learning_rate": 9.126761240133389e-06, "loss": 0.5396, "step": 3379 }, { "epoch": 0.43, "grad_norm": 0.7678252240305559, "learning_rate": 9.126178699996372e-06, "loss": 0.6594, "step": 3380 }, { "epoch": 0.43, "grad_norm": 0.6479400350903531, "learning_rate": 9.125595984221111e-06, "loss": 0.5199, "step": 3381 }, { "epoch": 0.43, "grad_norm": 0.9702338280115503, "learning_rate": 9.125013092832413e-06, "loss": 0.6508, "step": 3382 }, { "epoch": 0.43, "grad_norm": 0.7154203402310482, "learning_rate": 9.124430025855092e-06, "loss": 0.6366, "step": 3383 }, { "epoch": 0.43, "grad_norm": 1.0165729780864998, "learning_rate": 9.123846783313962e-06, "loss": 0.6224, "step": 3384 }, { "epoch": 0.43, "grad_norm": 0.5766769383579708, "learning_rate": 9.123263365233853e-06, "loss": 0.4986, "step": 3385 }, { "epoch": 0.43, "grad_norm": 0.6297472786360359, "learning_rate": 9.122679771639597e-06, "loss": 0.5538, "step": 3386 }, { "epoch": 0.43, "grad_norm": 0.5713774395211625, "learning_rate": 9.12209600255604e-06, "loss": 0.4997, "step": 3387 }, { "epoch": 0.43, "grad_norm": 0.7399484539715754, "learning_rate": 9.121512058008027e-06, "loss": 0.5827, "step": 3388 }, { "epoch": 0.43, "grad_norm": 0.8690171513296493, "learning_rate": 9.120927938020417e-06, "loss": 0.5909, "step": 3389 }, { "epoch": 0.43, "grad_norm": 0.6314486785053257, "learning_rate": 9.120343642618072e-06, "loss": 0.556, "step": 3390 }, { "epoch": 0.43, "grad_norm": 0.8256157442624091, "learning_rate": 9.119759171825864e-06, "loss": 0.5406, "step": 3391 }, { "epoch": 0.43, "grad_norm": 1.3008687447765541, "learning_rate": 9.119174525668675e-06, "loss": 0.6182, "step": 3392 }, { "epoch": 0.43, "grad_norm": 0.6699956479986262, "learning_rate": 9.11858970417139e-06, "loss": 0.5853, "step": 3393 }, { "epoch": 0.43, "grad_norm": 0.745843427690736, "learning_rate": 9.1180047073589e-06, "loss": 0.5609, "step": 3394 }, { "epoch": 0.43, "grad_norm": 0.8526497815180284, "learning_rate": 9.117419535256112e-06, "loss": 0.6744, "step": 3395 }, { "epoch": 0.43, "grad_norm": 0.8985836024702388, "learning_rate": 9.116834187887929e-06, "loss": 0.5944, "step": 3396 }, { "epoch": 0.43, "grad_norm": 0.7569995747681864, "learning_rate": 9.116248665279271e-06, "loss": 0.5935, "step": 3397 }, { "epoch": 0.43, "grad_norm": 0.8722201886654226, "learning_rate": 9.115662967455062e-06, "loss": 0.6522, "step": 3398 }, { "epoch": 0.43, "grad_norm": 0.54464972485816, "learning_rate": 9.11507709444023e-06, "loss": 0.4958, "step": 3399 }, { "epoch": 0.43, "grad_norm": 0.9578844103435927, "learning_rate": 9.11449104625972e-06, "loss": 0.6634, "step": 3400 }, { "epoch": 0.43, "grad_norm": 0.6305311560784173, "learning_rate": 9.11390482293847e-06, "loss": 0.5234, "step": 3401 }, { "epoch": 0.43, "grad_norm": 0.8473449763833505, "learning_rate": 9.113318424501441e-06, "loss": 0.6717, "step": 3402 }, { "epoch": 0.43, "grad_norm": 0.6465775105355057, "learning_rate": 9.112731850973588e-06, "loss": 0.5333, "step": 3403 }, { "epoch": 0.43, "grad_norm": 0.7232518515987104, "learning_rate": 9.112145102379883e-06, "loss": 0.6084, "step": 3404 }, { "epoch": 0.43, "grad_norm": 0.859278182864312, "learning_rate": 9.111558178745303e-06, "loss": 0.589, "step": 3405 }, { "epoch": 0.43, "grad_norm": 1.0453940997207696, "learning_rate": 9.110971080094831e-06, "loss": 0.6317, "step": 3406 }, { "epoch": 0.43, "grad_norm": 0.6580486255596497, "learning_rate": 9.110383806453454e-06, "loss": 0.531, "step": 3407 }, { "epoch": 0.43, "grad_norm": 0.7649491544022339, "learning_rate": 9.109796357846175e-06, "loss": 0.648, "step": 3408 }, { "epoch": 0.43, "grad_norm": 0.759930269148066, "learning_rate": 9.109208734297998e-06, "loss": 0.6209, "step": 3409 }, { "epoch": 0.43, "grad_norm": 0.7872410564615248, "learning_rate": 9.108620935833936e-06, "loss": 0.5486, "step": 3410 }, { "epoch": 0.43, "grad_norm": 0.9132832582965659, "learning_rate": 9.10803296247901e-06, "loss": 0.6446, "step": 3411 }, { "epoch": 0.43, "grad_norm": 0.8071141709769595, "learning_rate": 9.107444814258248e-06, "loss": 0.6386, "step": 3412 }, { "epoch": 0.43, "grad_norm": 0.6190393288077055, "learning_rate": 9.106856491196687e-06, "loss": 0.5587, "step": 3413 }, { "epoch": 0.43, "grad_norm": 0.687739849457782, "learning_rate": 9.106267993319367e-06, "loss": 0.5009, "step": 3414 }, { "epoch": 0.44, "grad_norm": 0.7321852620574655, "learning_rate": 9.105679320651342e-06, "loss": 0.5614, "step": 3415 }, { "epoch": 0.44, "grad_norm": 0.795381282193837, "learning_rate": 9.105090473217666e-06, "loss": 0.5831, "step": 3416 }, { "epoch": 0.44, "grad_norm": 0.5307703594494323, "learning_rate": 9.104501451043408e-06, "loss": 0.5021, "step": 3417 }, { "epoch": 0.44, "grad_norm": 0.9021048770161111, "learning_rate": 9.103912254153639e-06, "loss": 0.5814, "step": 3418 }, { "epoch": 0.44, "grad_norm": 1.091798701074009, "learning_rate": 9.10332288257344e-06, "loss": 0.629, "step": 3419 }, { "epoch": 0.44, "grad_norm": 0.7033812224130684, "learning_rate": 9.102733336327898e-06, "loss": 0.5506, "step": 3420 }, { "epoch": 0.44, "grad_norm": 0.7485385640187735, "learning_rate": 9.102143615442108e-06, "loss": 0.5998, "step": 3421 }, { "epoch": 0.44, "grad_norm": 0.8108791291501346, "learning_rate": 9.101553719941174e-06, "loss": 0.5812, "step": 3422 }, { "epoch": 0.44, "grad_norm": 0.7281258194228255, "learning_rate": 9.100963649850205e-06, "loss": 0.6093, "step": 3423 }, { "epoch": 0.44, "grad_norm": 0.62751963414827, "learning_rate": 9.100373405194315e-06, "loss": 0.529, "step": 3424 }, { "epoch": 0.44, "grad_norm": 0.830029602085821, "learning_rate": 9.099782985998636e-06, "loss": 0.5853, "step": 3425 }, { "epoch": 0.44, "grad_norm": 0.5672589507696492, "learning_rate": 9.099192392288294e-06, "loss": 0.5196, "step": 3426 }, { "epoch": 0.44, "grad_norm": 0.8414099568616669, "learning_rate": 9.098601624088431e-06, "loss": 0.5821, "step": 3427 }, { "epoch": 0.44, "grad_norm": 0.5549940027603152, "learning_rate": 9.098010681424195e-06, "loss": 0.5335, "step": 3428 }, { "epoch": 0.44, "grad_norm": 0.5860203072757566, "learning_rate": 9.09741956432074e-06, "loss": 0.5245, "step": 3429 }, { "epoch": 0.44, "grad_norm": 0.6877467215061256, "learning_rate": 9.096828272803226e-06, "loss": 0.5793, "step": 3430 }, { "epoch": 0.44, "grad_norm": 0.7067325107093925, "learning_rate": 9.096236806896826e-06, "loss": 0.5951, "step": 3431 }, { "epoch": 0.44, "grad_norm": 0.6761810453383481, "learning_rate": 9.095645166626715e-06, "loss": 0.5423, "step": 3432 }, { "epoch": 0.44, "grad_norm": 0.5536052962497934, "learning_rate": 9.095053352018075e-06, "loss": 0.4714, "step": 3433 }, { "epoch": 0.44, "grad_norm": 0.781149231770683, "learning_rate": 9.094461363096101e-06, "loss": 0.6311, "step": 3434 }, { "epoch": 0.44, "grad_norm": 0.8280277777453815, "learning_rate": 9.093869199885989e-06, "loss": 0.6185, "step": 3435 }, { "epoch": 0.44, "grad_norm": 0.5771242618740802, "learning_rate": 9.09327686241295e-06, "loss": 0.4675, "step": 3436 }, { "epoch": 0.44, "grad_norm": 0.617704913127233, "learning_rate": 9.092684350702194e-06, "loss": 0.6161, "step": 3437 }, { "epoch": 0.44, "grad_norm": 0.8166272347866761, "learning_rate": 9.092091664778944e-06, "loss": 0.6166, "step": 3438 }, { "epoch": 0.44, "grad_norm": 0.6073872660581949, "learning_rate": 9.091498804668427e-06, "loss": 0.5854, "step": 3439 }, { "epoch": 0.44, "grad_norm": 0.7709524855851002, "learning_rate": 9.090905770395881e-06, "loss": 0.6592, "step": 3440 }, { "epoch": 0.44, "grad_norm": 0.7446287029972817, "learning_rate": 9.09031256198655e-06, "loss": 0.5939, "step": 3441 }, { "epoch": 0.44, "grad_norm": 0.5762656640439561, "learning_rate": 9.089719179465684e-06, "loss": 0.5321, "step": 3442 }, { "epoch": 0.44, "grad_norm": 0.7922364264667471, "learning_rate": 9.089125622858538e-06, "loss": 0.6616, "step": 3443 }, { "epoch": 0.44, "grad_norm": 0.5957511470102311, "learning_rate": 9.088531892190387e-06, "loss": 0.5604, "step": 3444 }, { "epoch": 0.44, "grad_norm": 0.5609628154928852, "learning_rate": 9.087937987486495e-06, "loss": 0.4976, "step": 3445 }, { "epoch": 0.44, "grad_norm": 0.6642831355527429, "learning_rate": 9.087343908772147e-06, "loss": 0.5502, "step": 3446 }, { "epoch": 0.44, "grad_norm": 0.7742182344477593, "learning_rate": 9.086749656072628e-06, "loss": 0.6316, "step": 3447 }, { "epoch": 0.44, "grad_norm": 0.7032856889729958, "learning_rate": 9.086155229413237e-06, "loss": 0.5739, "step": 3448 }, { "epoch": 0.44, "grad_norm": 0.6828160605012141, "learning_rate": 9.085560628819275e-06, "loss": 0.6179, "step": 3449 }, { "epoch": 0.44, "grad_norm": 0.6665142563842354, "learning_rate": 9.084965854316053e-06, "loss": 0.5718, "step": 3450 }, { "epoch": 0.44, "grad_norm": 0.6591496051120823, "learning_rate": 9.084370905928888e-06, "loss": 0.5358, "step": 3451 }, { "epoch": 0.44, "grad_norm": 0.9074742145259302, "learning_rate": 9.083775783683107e-06, "loss": 0.6626, "step": 3452 }, { "epoch": 0.44, "grad_norm": 0.7297895037645014, "learning_rate": 9.083180487604037e-06, "loss": 0.5436, "step": 3453 }, { "epoch": 0.44, "grad_norm": 0.6099829711461648, "learning_rate": 9.082585017717025e-06, "loss": 0.5411, "step": 3454 }, { "epoch": 0.44, "grad_norm": 0.596012446359992, "learning_rate": 9.081989374047414e-06, "loss": 0.5547, "step": 3455 }, { "epoch": 0.44, "grad_norm": 0.6013586954300786, "learning_rate": 9.081393556620559e-06, "loss": 0.546, "step": 3456 }, { "epoch": 0.44, "grad_norm": 0.6037784928475926, "learning_rate": 9.080797565461823e-06, "loss": 0.5251, "step": 3457 }, { "epoch": 0.44, "grad_norm": 0.6740132365035242, "learning_rate": 9.080201400596576e-06, "loss": 0.5677, "step": 3458 }, { "epoch": 0.44, "grad_norm": 0.5920250294100176, "learning_rate": 9.079605062050192e-06, "loss": 0.4839, "step": 3459 }, { "epoch": 0.44, "grad_norm": 0.8994227572042438, "learning_rate": 9.079008549848059e-06, "loss": 0.6469, "step": 3460 }, { "epoch": 0.44, "grad_norm": 0.7040440667533507, "learning_rate": 9.078411864015564e-06, "loss": 0.554, "step": 3461 }, { "epoch": 0.44, "grad_norm": 0.7005283401069226, "learning_rate": 9.07781500457811e-06, "loss": 0.5902, "step": 3462 }, { "epoch": 0.44, "grad_norm": 0.6336861352541178, "learning_rate": 9.077217971561104e-06, "loss": 0.5874, "step": 3463 }, { "epoch": 0.44, "grad_norm": 0.6950969304149969, "learning_rate": 9.076620764989954e-06, "loss": 0.5774, "step": 3464 }, { "epoch": 0.44, "grad_norm": 0.663118890296144, "learning_rate": 9.076023384890088e-06, "loss": 0.5519, "step": 3465 }, { "epoch": 0.44, "grad_norm": 0.740456861741743, "learning_rate": 9.07542583128693e-06, "loss": 0.6554, "step": 3466 }, { "epoch": 0.44, "grad_norm": 0.9448633073039158, "learning_rate": 9.07482810420592e-06, "loss": 0.6324, "step": 3467 }, { "epoch": 0.44, "grad_norm": 0.6265449999592396, "learning_rate": 9.074230203672497e-06, "loss": 0.5207, "step": 3468 }, { "epoch": 0.44, "grad_norm": 0.773030939432317, "learning_rate": 9.073632129712113e-06, "loss": 0.6169, "step": 3469 }, { "epoch": 0.44, "grad_norm": 0.9168246701072386, "learning_rate": 9.073033882350228e-06, "loss": 0.6942, "step": 3470 }, { "epoch": 0.44, "grad_norm": 1.531811412093806, "learning_rate": 9.072435461612306e-06, "loss": 0.633, "step": 3471 }, { "epoch": 0.44, "grad_norm": 0.6088111774191548, "learning_rate": 9.07183686752382e-06, "loss": 0.5624, "step": 3472 }, { "epoch": 0.44, "grad_norm": 0.5718113399475073, "learning_rate": 9.07123810011025e-06, "loss": 0.5179, "step": 3473 }, { "epoch": 0.44, "grad_norm": 0.6586860404694903, "learning_rate": 9.070639159397083e-06, "loss": 0.5209, "step": 3474 }, { "epoch": 0.44, "grad_norm": 0.695078600559415, "learning_rate": 9.070040045409816e-06, "loss": 0.543, "step": 3475 }, { "epoch": 0.44, "grad_norm": 0.7903743500897177, "learning_rate": 9.06944075817395e-06, "loss": 0.5196, "step": 3476 }, { "epoch": 0.44, "grad_norm": 2.3566609457891983, "learning_rate": 9.068841297714995e-06, "loss": 0.6252, "step": 3477 }, { "epoch": 0.44, "grad_norm": 0.8800272974540738, "learning_rate": 9.068241664058468e-06, "loss": 0.6801, "step": 3478 }, { "epoch": 0.44, "grad_norm": 0.5566431404014974, "learning_rate": 9.067641857229894e-06, "loss": 0.5208, "step": 3479 }, { "epoch": 0.44, "grad_norm": 0.6205159678325204, "learning_rate": 9.067041877254804e-06, "loss": 0.5347, "step": 3480 }, { "epoch": 0.44, "grad_norm": 0.7549855744068191, "learning_rate": 9.066441724158739e-06, "loss": 0.5272, "step": 3481 }, { "epoch": 0.44, "grad_norm": 0.5513535047703215, "learning_rate": 9.065841397967244e-06, "loss": 0.5126, "step": 3482 }, { "epoch": 0.44, "grad_norm": 0.6202979237028544, "learning_rate": 9.065240898705872e-06, "loss": 0.5474, "step": 3483 }, { "epoch": 0.44, "grad_norm": 0.716092780306067, "learning_rate": 9.064640226400185e-06, "loss": 0.5148, "step": 3484 }, { "epoch": 0.44, "grad_norm": 0.6400446680115223, "learning_rate": 9.064039381075754e-06, "loss": 0.5406, "step": 3485 }, { "epoch": 0.44, "grad_norm": 0.622689900777675, "learning_rate": 9.063438362758152e-06, "loss": 0.5448, "step": 3486 }, { "epoch": 0.44, "grad_norm": 0.7307962940199746, "learning_rate": 9.062837171472964e-06, "loss": 0.5628, "step": 3487 }, { "epoch": 0.44, "grad_norm": 0.6741559708179777, "learning_rate": 9.062235807245782e-06, "loss": 0.6137, "step": 3488 }, { "epoch": 0.44, "grad_norm": 0.7451203092554143, "learning_rate": 9.061634270102203e-06, "loss": 0.6273, "step": 3489 }, { "epoch": 0.44, "grad_norm": 0.7588593424679776, "learning_rate": 9.06103256006783e-06, "loss": 0.6269, "step": 3490 }, { "epoch": 0.44, "grad_norm": 0.5981496309045811, "learning_rate": 9.06043067716828e-06, "loss": 0.5627, "step": 3491 }, { "epoch": 0.44, "grad_norm": 0.6738724834755675, "learning_rate": 9.05982862142917e-06, "loss": 0.6255, "step": 3492 }, { "epoch": 0.44, "grad_norm": 0.6156238216372788, "learning_rate": 9.05922639287613e-06, "loss": 0.5565, "step": 3493 }, { "epoch": 0.45, "grad_norm": 0.6418613128208774, "learning_rate": 9.058623991534792e-06, "loss": 0.5136, "step": 3494 }, { "epoch": 0.45, "grad_norm": 0.9065074820157594, "learning_rate": 9.058021417430802e-06, "loss": 0.6651, "step": 3495 }, { "epoch": 0.45, "grad_norm": 0.877923466513029, "learning_rate": 9.057418670589808e-06, "loss": 0.6275, "step": 3496 }, { "epoch": 0.45, "grad_norm": 0.6657402420828011, "learning_rate": 9.056815751037467e-06, "loss": 0.5542, "step": 3497 }, { "epoch": 0.45, "grad_norm": 0.8721493810906396, "learning_rate": 9.056212658799442e-06, "loss": 0.6611, "step": 3498 }, { "epoch": 0.45, "grad_norm": 0.6708404727325938, "learning_rate": 9.055609393901406e-06, "loss": 0.5685, "step": 3499 }, { "epoch": 0.45, "grad_norm": 0.8400889025363306, "learning_rate": 9.055005956369038e-06, "loss": 0.6319, "step": 3500 }, { "epoch": 0.45, "grad_norm": 0.9255944785305048, "learning_rate": 9.054402346228025e-06, "loss": 0.6464, "step": 3501 }, { "epoch": 0.45, "grad_norm": 0.6925266681120441, "learning_rate": 9.05379856350406e-06, "loss": 0.5551, "step": 3502 }, { "epoch": 0.45, "grad_norm": 0.7483439418040335, "learning_rate": 9.053194608222843e-06, "loss": 0.5183, "step": 3503 }, { "epoch": 0.45, "grad_norm": 0.6846736393015276, "learning_rate": 9.052590480410086e-06, "loss": 0.5299, "step": 3504 }, { "epoch": 0.45, "grad_norm": 0.6688169423039391, "learning_rate": 9.051986180091501e-06, "loss": 0.6147, "step": 3505 }, { "epoch": 0.45, "grad_norm": 0.8562968352471371, "learning_rate": 9.051381707292813e-06, "loss": 0.6629, "step": 3506 }, { "epoch": 0.45, "grad_norm": 0.5459875483422001, "learning_rate": 9.050777062039753e-06, "loss": 0.5054, "step": 3507 }, { "epoch": 0.45, "grad_norm": 0.7191218558112576, "learning_rate": 9.050172244358055e-06, "loss": 0.6282, "step": 3508 }, { "epoch": 0.45, "grad_norm": 0.9132885327842507, "learning_rate": 9.04956725427347e-06, "loss": 0.6207, "step": 3509 }, { "epoch": 0.45, "grad_norm": 0.6305026284423193, "learning_rate": 9.048962091811746e-06, "loss": 0.5662, "step": 3510 }, { "epoch": 0.45, "grad_norm": 0.792983350898886, "learning_rate": 9.048356756998645e-06, "loss": 0.6244, "step": 3511 }, { "epoch": 0.45, "grad_norm": 1.1881448784013917, "learning_rate": 9.047751249859935e-06, "loss": 0.6438, "step": 3512 }, { "epoch": 0.45, "grad_norm": 0.7654548079755309, "learning_rate": 9.047145570421387e-06, "loss": 0.5731, "step": 3513 }, { "epoch": 0.45, "grad_norm": 0.882883690809048, "learning_rate": 9.046539718708784e-06, "loss": 0.6784, "step": 3514 }, { "epoch": 0.45, "grad_norm": 0.6098511240638075, "learning_rate": 9.045933694747919e-06, "loss": 0.496, "step": 3515 }, { "epoch": 0.45, "grad_norm": 0.8982423895289703, "learning_rate": 9.045327498564584e-06, "loss": 0.6329, "step": 3516 }, { "epoch": 0.45, "grad_norm": 0.9235146585001086, "learning_rate": 9.044721130184584e-06, "loss": 0.5889, "step": 3517 }, { "epoch": 0.45, "grad_norm": 0.6330000143716848, "learning_rate": 9.04411458963373e-06, "loss": 0.5864, "step": 3518 }, { "epoch": 0.45, "grad_norm": 0.764318534633108, "learning_rate": 9.043507876937843e-06, "loss": 0.5963, "step": 3519 }, { "epoch": 0.45, "grad_norm": 0.8447809422758628, "learning_rate": 9.042900992122745e-06, "loss": 0.6525, "step": 3520 }, { "epoch": 0.45, "grad_norm": 0.6936724517910646, "learning_rate": 9.042293935214272e-06, "loss": 0.4891, "step": 3521 }, { "epoch": 0.45, "grad_norm": 0.6371401996807756, "learning_rate": 9.041686706238264e-06, "loss": 0.5634, "step": 3522 }, { "epoch": 0.45, "grad_norm": 0.5988503216968053, "learning_rate": 9.041079305220569e-06, "loss": 0.5076, "step": 3523 }, { "epoch": 0.45, "grad_norm": 0.9111324375489593, "learning_rate": 9.04047173218704e-06, "loss": 0.6961, "step": 3524 }, { "epoch": 0.45, "grad_norm": 1.0821068528264417, "learning_rate": 9.03986398716354e-06, "loss": 0.6123, "step": 3525 }, { "epoch": 0.45, "grad_norm": 0.6841654508660304, "learning_rate": 9.03925607017594e-06, "loss": 0.5856, "step": 3526 }, { "epoch": 0.45, "grad_norm": 0.8591711706905032, "learning_rate": 9.038647981250117e-06, "loss": 0.5892, "step": 3527 }, { "epoch": 0.45, "grad_norm": 0.8684315769543314, "learning_rate": 9.038039720411958e-06, "loss": 0.5494, "step": 3528 }, { "epoch": 0.45, "grad_norm": 1.1932004626529524, "learning_rate": 9.037431287687348e-06, "loss": 0.6754, "step": 3529 }, { "epoch": 0.45, "grad_norm": 0.5471167506236436, "learning_rate": 9.036822683102192e-06, "loss": 0.5416, "step": 3530 }, { "epoch": 0.45, "grad_norm": 1.3445573027619735, "learning_rate": 9.036213906682392e-06, "loss": 0.6121, "step": 3531 }, { "epoch": 0.45, "grad_norm": 0.8696630568214259, "learning_rate": 9.035604958453865e-06, "loss": 0.7065, "step": 3532 }, { "epoch": 0.45, "grad_norm": 0.738741077272047, "learning_rate": 9.034995838442533e-06, "loss": 0.6173, "step": 3533 }, { "epoch": 0.45, "grad_norm": 0.683246772680504, "learning_rate": 9.03438654667432e-06, "loss": 0.6086, "step": 3534 }, { "epoch": 0.45, "grad_norm": 0.6920271954019079, "learning_rate": 9.033777083175165e-06, "loss": 0.6018, "step": 3535 }, { "epoch": 0.45, "grad_norm": 0.7884672789407592, "learning_rate": 9.033167447971008e-06, "loss": 0.6032, "step": 3536 }, { "epoch": 0.45, "grad_norm": 0.5911953857692304, "learning_rate": 9.0325576410878e-06, "loss": 0.5322, "step": 3537 }, { "epoch": 0.45, "grad_norm": 0.6238313942336, "learning_rate": 9.031947662551502e-06, "loss": 0.5774, "step": 3538 }, { "epoch": 0.45, "grad_norm": 0.8525044260773812, "learning_rate": 9.031337512388076e-06, "loss": 0.6106, "step": 3539 }, { "epoch": 0.45, "grad_norm": 0.7503718152639232, "learning_rate": 9.030727190623495e-06, "loss": 0.5538, "step": 3540 }, { "epoch": 0.45, "grad_norm": 0.6340760337955164, "learning_rate": 9.030116697283735e-06, "loss": 0.5398, "step": 3541 }, { "epoch": 0.45, "grad_norm": 0.7525091210116114, "learning_rate": 9.029506032394787e-06, "loss": 0.5367, "step": 3542 }, { "epoch": 0.45, "grad_norm": 0.6724150312502559, "learning_rate": 9.028895195982643e-06, "loss": 0.5417, "step": 3543 }, { "epoch": 0.45, "grad_norm": 0.656939585729722, "learning_rate": 9.028284188073306e-06, "loss": 0.5082, "step": 3544 }, { "epoch": 0.45, "grad_norm": 0.6931507465832119, "learning_rate": 9.027673008692785e-06, "loss": 0.605, "step": 3545 }, { "epoch": 0.45, "grad_norm": 0.6786019314804285, "learning_rate": 9.027061657867094e-06, "loss": 0.5854, "step": 3546 }, { "epoch": 0.45, "grad_norm": 0.7182869959058398, "learning_rate": 9.026450135622254e-06, "loss": 0.6483, "step": 3547 }, { "epoch": 0.45, "grad_norm": 0.7704219718710181, "learning_rate": 9.0258384419843e-06, "loss": 0.601, "step": 3548 }, { "epoch": 0.45, "grad_norm": 0.5832354117182584, "learning_rate": 9.02522657697927e-06, "loss": 0.539, "step": 3549 }, { "epoch": 0.45, "grad_norm": 0.7130117431608659, "learning_rate": 9.024614540633205e-06, "loss": 0.5908, "step": 3550 }, { "epoch": 0.45, "grad_norm": 0.6324806673749507, "learning_rate": 9.02400233297216e-06, "loss": 0.5251, "step": 3551 }, { "epoch": 0.45, "grad_norm": 0.7298318214376163, "learning_rate": 9.023389954022196e-06, "loss": 0.6148, "step": 3552 }, { "epoch": 0.45, "grad_norm": 0.5603837477940324, "learning_rate": 9.02277740380938e-06, "loss": 0.5157, "step": 3553 }, { "epoch": 0.45, "grad_norm": 0.7181252072288951, "learning_rate": 9.022164682359783e-06, "loss": 0.5862, "step": 3554 }, { "epoch": 0.45, "grad_norm": 1.2766420117783404, "learning_rate": 9.021551789699487e-06, "loss": 0.5757, "step": 3555 }, { "epoch": 0.45, "grad_norm": 0.8044614780027008, "learning_rate": 9.020938725854585e-06, "loss": 0.6335, "step": 3556 }, { "epoch": 0.45, "grad_norm": 0.6934871265053367, "learning_rate": 9.020325490851169e-06, "loss": 0.5562, "step": 3557 }, { "epoch": 0.45, "grad_norm": 0.565211036478897, "learning_rate": 9.019712084715343e-06, "loss": 0.5114, "step": 3558 }, { "epoch": 0.45, "grad_norm": 0.7343693646958838, "learning_rate": 9.019098507473222e-06, "loss": 0.5775, "step": 3559 }, { "epoch": 0.45, "grad_norm": 0.8307521926904984, "learning_rate": 9.018484759150919e-06, "loss": 0.6105, "step": 3560 }, { "epoch": 0.45, "grad_norm": 0.6969391684108982, "learning_rate": 9.017870839774561e-06, "loss": 0.5618, "step": 3561 }, { "epoch": 0.45, "grad_norm": 0.7913594337955373, "learning_rate": 9.01725674937028e-06, "loss": 0.6236, "step": 3562 }, { "epoch": 0.45, "grad_norm": 0.7831484268905573, "learning_rate": 9.016642487964216e-06, "loss": 0.6271, "step": 3563 }, { "epoch": 0.45, "grad_norm": 0.7734479276434101, "learning_rate": 9.016028055582519e-06, "loss": 0.5629, "step": 3564 }, { "epoch": 0.45, "grad_norm": 0.9184675808772949, "learning_rate": 9.015413452251339e-06, "loss": 0.5913, "step": 3565 }, { "epoch": 0.45, "grad_norm": 0.9242821490871466, "learning_rate": 9.014798677996842e-06, "loss": 0.6394, "step": 3566 }, { "epoch": 0.45, "grad_norm": 0.5374049006998934, "learning_rate": 9.014183732845192e-06, "loss": 0.5085, "step": 3567 }, { "epoch": 0.45, "grad_norm": 0.5652246223121693, "learning_rate": 9.013568616822569e-06, "loss": 0.5485, "step": 3568 }, { "epoch": 0.45, "grad_norm": 0.736919600344496, "learning_rate": 9.012953329955155e-06, "loss": 0.6425, "step": 3569 }, { "epoch": 0.45, "grad_norm": 0.6557264578405426, "learning_rate": 9.012337872269142e-06, "loss": 0.4939, "step": 3570 }, { "epoch": 0.45, "grad_norm": 0.6350277987555162, "learning_rate": 9.011722243790728e-06, "loss": 0.598, "step": 3571 }, { "epoch": 0.46, "grad_norm": 0.8998325682651059, "learning_rate": 9.011106444546115e-06, "loss": 0.5951, "step": 3572 }, { "epoch": 0.46, "grad_norm": 0.854526313990166, "learning_rate": 9.010490474561522e-06, "loss": 0.6122, "step": 3573 }, { "epoch": 0.46, "grad_norm": 0.7132548594570698, "learning_rate": 9.009874333863163e-06, "loss": 0.5614, "step": 3574 }, { "epoch": 0.46, "grad_norm": 0.6080094417516144, "learning_rate": 9.009258022477268e-06, "loss": 0.4917, "step": 3575 }, { "epoch": 0.46, "grad_norm": 0.8485189554263428, "learning_rate": 9.00864154043007e-06, "loss": 0.6404, "step": 3576 }, { "epoch": 0.46, "grad_norm": 0.8277463297538589, "learning_rate": 9.008024887747813e-06, "loss": 0.5755, "step": 3577 }, { "epoch": 0.46, "grad_norm": 0.967668426314493, "learning_rate": 9.007408064456744e-06, "loss": 0.6513, "step": 3578 }, { "epoch": 0.46, "grad_norm": 0.6532989589050314, "learning_rate": 9.006791070583119e-06, "loss": 0.5663, "step": 3579 }, { "epoch": 0.46, "grad_norm": 0.6288425525090104, "learning_rate": 9.006173906153202e-06, "loss": 0.4971, "step": 3580 }, { "epoch": 0.46, "grad_norm": 0.7285857204167445, "learning_rate": 9.005556571193263e-06, "loss": 0.5593, "step": 3581 }, { "epoch": 0.46, "grad_norm": 0.6763956316419001, "learning_rate": 9.004939065729581e-06, "loss": 0.5647, "step": 3582 }, { "epoch": 0.46, "grad_norm": 0.8032770269219566, "learning_rate": 9.004321389788442e-06, "loss": 0.6445, "step": 3583 }, { "epoch": 0.46, "grad_norm": 0.969687943895717, "learning_rate": 9.003703543396137e-06, "loss": 0.6297, "step": 3584 }, { "epoch": 0.46, "grad_norm": 0.5827599786857293, "learning_rate": 9.003085526578964e-06, "loss": 0.5076, "step": 3585 }, { "epoch": 0.46, "grad_norm": 0.6571114061701214, "learning_rate": 9.002467339363234e-06, "loss": 0.5265, "step": 3586 }, { "epoch": 0.46, "grad_norm": 0.8832216698744784, "learning_rate": 9.001848981775261e-06, "loss": 0.5551, "step": 3587 }, { "epoch": 0.46, "grad_norm": 0.7237677051629329, "learning_rate": 9.001230453841363e-06, "loss": 0.5705, "step": 3588 }, { "epoch": 0.46, "grad_norm": 0.8385872730792924, "learning_rate": 9.000611755587872e-06, "loss": 0.6383, "step": 3589 }, { "epoch": 0.46, "grad_norm": 0.8945242828773745, "learning_rate": 8.99999288704112e-06, "loss": 0.6061, "step": 3590 }, { "epoch": 0.46, "grad_norm": 0.7118769071711949, "learning_rate": 8.999373848227455e-06, "loss": 0.5276, "step": 3591 }, { "epoch": 0.46, "grad_norm": 0.5566838614263709, "learning_rate": 8.998754639173225e-06, "loss": 0.5062, "step": 3592 }, { "epoch": 0.46, "grad_norm": 0.6515824007018469, "learning_rate": 8.998135259904787e-06, "loss": 0.5533, "step": 3593 }, { "epoch": 0.46, "grad_norm": 0.7030632564794325, "learning_rate": 8.99751571044851e-06, "loss": 0.5867, "step": 3594 }, { "epoch": 0.46, "grad_norm": 0.7117488516645356, "learning_rate": 8.996895990830763e-06, "loss": 0.5591, "step": 3595 }, { "epoch": 0.46, "grad_norm": 0.6918029758605921, "learning_rate": 8.996276101077924e-06, "loss": 0.5465, "step": 3596 }, { "epoch": 0.46, "grad_norm": 0.6188471235386569, "learning_rate": 8.995656041216383e-06, "loss": 0.5254, "step": 3597 }, { "epoch": 0.46, "grad_norm": 0.6665565780111213, "learning_rate": 8.995035811272533e-06, "loss": 0.5108, "step": 3598 }, { "epoch": 0.46, "grad_norm": 0.6798739320660234, "learning_rate": 8.994415411272772e-06, "loss": 0.5485, "step": 3599 }, { "epoch": 0.46, "grad_norm": 0.5883844255105386, "learning_rate": 8.993794841243513e-06, "loss": 0.4893, "step": 3600 }, { "epoch": 0.46, "grad_norm": 0.8426013484882545, "learning_rate": 8.99317410121117e-06, "loss": 0.6526, "step": 3601 }, { "epoch": 0.46, "grad_norm": 0.9301523420229989, "learning_rate": 8.992553191202168e-06, "loss": 0.6579, "step": 3602 }, { "epoch": 0.46, "grad_norm": 0.6248214648633588, "learning_rate": 8.991932111242933e-06, "loss": 0.5348, "step": 3603 }, { "epoch": 0.46, "grad_norm": 0.8076042973562217, "learning_rate": 8.991310861359904e-06, "loss": 0.539, "step": 3604 }, { "epoch": 0.46, "grad_norm": 0.7605421082766961, "learning_rate": 8.990689441579526e-06, "loss": 0.6825, "step": 3605 }, { "epoch": 0.46, "grad_norm": 0.7127814112166351, "learning_rate": 8.990067851928252e-06, "loss": 0.5872, "step": 3606 }, { "epoch": 0.46, "grad_norm": 0.5811292756002087, "learning_rate": 8.98944609243254e-06, "loss": 0.5591, "step": 3607 }, { "epoch": 0.46, "grad_norm": 0.5792800572504391, "learning_rate": 8.988824163118854e-06, "loss": 0.5058, "step": 3608 }, { "epoch": 0.46, "grad_norm": 0.7089904525701487, "learning_rate": 8.988202064013673e-06, "loss": 0.5812, "step": 3609 }, { "epoch": 0.46, "grad_norm": 0.7294877951057975, "learning_rate": 8.987579795143473e-06, "loss": 0.5835, "step": 3610 }, { "epoch": 0.46, "grad_norm": 0.6887990728814826, "learning_rate": 8.986957356534742e-06, "loss": 0.5641, "step": 3611 }, { "epoch": 0.46, "grad_norm": 0.7191118812383622, "learning_rate": 8.98633474821398e-06, "loss": 0.5935, "step": 3612 }, { "epoch": 0.46, "grad_norm": 0.6515297198485607, "learning_rate": 8.985711970207685e-06, "loss": 0.5361, "step": 3613 }, { "epoch": 0.46, "grad_norm": 0.9157814976397483, "learning_rate": 8.985089022542367e-06, "loss": 0.6844, "step": 3614 }, { "epoch": 0.46, "grad_norm": 0.7650958676349688, "learning_rate": 8.984465905244546e-06, "loss": 0.6407, "step": 3615 }, { "epoch": 0.46, "grad_norm": 2.024815730251743, "learning_rate": 8.983842618340742e-06, "loss": 0.6333, "step": 3616 }, { "epoch": 0.46, "grad_norm": 0.5460686408606807, "learning_rate": 8.983219161857489e-06, "loss": 0.5028, "step": 3617 }, { "epoch": 0.46, "grad_norm": 0.6485308670297574, "learning_rate": 8.982595535821324e-06, "loss": 0.55, "step": 3618 }, { "epoch": 0.46, "grad_norm": 0.6363369782361266, "learning_rate": 8.981971740258795e-06, "loss": 0.5457, "step": 3619 }, { "epoch": 0.46, "grad_norm": 0.6064298546329495, "learning_rate": 8.981347775196452e-06, "loss": 0.572, "step": 3620 }, { "epoch": 0.46, "grad_norm": 0.750851122173302, "learning_rate": 8.980723640660857e-06, "loss": 0.5652, "step": 3621 }, { "epoch": 0.46, "grad_norm": 0.6175940383720777, "learning_rate": 8.980099336678577e-06, "loss": 0.5109, "step": 3622 }, { "epoch": 0.46, "grad_norm": 0.6855898550667417, "learning_rate": 8.979474863276188e-06, "loss": 0.5377, "step": 3623 }, { "epoch": 0.46, "grad_norm": 0.5832630637423732, "learning_rate": 8.97885022048027e-06, "loss": 0.5287, "step": 3624 }, { "epoch": 0.46, "grad_norm": 0.6370558039230712, "learning_rate": 8.978225408317414e-06, "loss": 0.5674, "step": 3625 }, { "epoch": 0.46, "grad_norm": 0.7609008271334486, "learning_rate": 8.977600426814215e-06, "loss": 0.6438, "step": 3626 }, { "epoch": 0.46, "grad_norm": 0.6273958182773944, "learning_rate": 8.976975275997275e-06, "loss": 0.5573, "step": 3627 }, { "epoch": 0.46, "grad_norm": 0.9226487919657396, "learning_rate": 8.976349955893206e-06, "loss": 0.6231, "step": 3628 }, { "epoch": 0.46, "grad_norm": 0.6128143406081149, "learning_rate": 8.975724466528626e-06, "loss": 0.5613, "step": 3629 }, { "epoch": 0.46, "grad_norm": 0.6398142618401653, "learning_rate": 8.97509880793016e-06, "loss": 0.5611, "step": 3630 }, { "epoch": 0.46, "grad_norm": 0.7264243852564435, "learning_rate": 8.974472980124443e-06, "loss": 0.5442, "step": 3631 }, { "epoch": 0.46, "grad_norm": 0.5936566974062466, "learning_rate": 8.973846983138107e-06, "loss": 0.5127, "step": 3632 }, { "epoch": 0.46, "grad_norm": 0.6381544609829135, "learning_rate": 8.973220816997808e-06, "loss": 0.5251, "step": 3633 }, { "epoch": 0.46, "grad_norm": 0.6156416391308108, "learning_rate": 8.972594481730192e-06, "loss": 0.5123, "step": 3634 }, { "epoch": 0.46, "grad_norm": 0.5261340639641087, "learning_rate": 8.971967977361925e-06, "loss": 0.5031, "step": 3635 }, { "epoch": 0.46, "grad_norm": 0.788753557283552, "learning_rate": 8.971341303919676e-06, "loss": 0.6076, "step": 3636 }, { "epoch": 0.46, "grad_norm": 0.7121461437090074, "learning_rate": 8.970714461430116e-06, "loss": 0.5194, "step": 3637 }, { "epoch": 0.46, "grad_norm": 0.6242108132600825, "learning_rate": 8.970087449919929e-06, "loss": 0.5454, "step": 3638 }, { "epoch": 0.46, "grad_norm": 0.8317702218257098, "learning_rate": 8.969460269415807e-06, "loss": 0.6484, "step": 3639 }, { "epoch": 0.46, "grad_norm": 0.6835746484984608, "learning_rate": 8.968832919944446e-06, "loss": 0.5384, "step": 3640 }, { "epoch": 0.46, "grad_norm": 0.6736306798476158, "learning_rate": 8.96820540153255e-06, "loss": 0.5284, "step": 3641 }, { "epoch": 0.46, "grad_norm": 1.0457220740733495, "learning_rate": 8.967577714206832e-06, "loss": 0.6925, "step": 3642 }, { "epoch": 0.46, "grad_norm": 0.7113487787701108, "learning_rate": 8.966949857994007e-06, "loss": 0.6277, "step": 3643 }, { "epoch": 0.46, "grad_norm": 0.7970221435192456, "learning_rate": 8.966321832920804e-06, "loss": 0.6078, "step": 3644 }, { "epoch": 0.46, "grad_norm": 0.7886578393286339, "learning_rate": 8.965693639013955e-06, "loss": 0.5752, "step": 3645 }, { "epoch": 0.46, "grad_norm": 0.7332633846795106, "learning_rate": 8.965065276300202e-06, "loss": 0.6483, "step": 3646 }, { "epoch": 0.46, "grad_norm": 0.7502611305161161, "learning_rate": 8.96443674480629e-06, "loss": 0.5996, "step": 3647 }, { "epoch": 0.46, "grad_norm": 0.6608765133593425, "learning_rate": 8.963808044558972e-06, "loss": 0.5511, "step": 3648 }, { "epoch": 0.46, "grad_norm": 0.7846595181403333, "learning_rate": 8.963179175585012e-06, "loss": 0.5486, "step": 3649 }, { "epoch": 0.46, "grad_norm": 0.9753311951927206, "learning_rate": 8.962550137911182e-06, "loss": 0.5919, "step": 3650 }, { "epoch": 0.47, "grad_norm": 0.6435971415344093, "learning_rate": 8.961920931564255e-06, "loss": 0.5739, "step": 3651 }, { "epoch": 0.47, "grad_norm": 0.7819153363536666, "learning_rate": 8.961291556571012e-06, "loss": 0.644, "step": 3652 }, { "epoch": 0.47, "grad_norm": 0.8680582180989188, "learning_rate": 8.960662012958247e-06, "loss": 0.6545, "step": 3653 }, { "epoch": 0.47, "grad_norm": 0.698502228725985, "learning_rate": 8.960032300752756e-06, "loss": 0.5272, "step": 3654 }, { "epoch": 0.47, "grad_norm": 0.6257151794084105, "learning_rate": 8.959402419981346e-06, "loss": 0.5477, "step": 3655 }, { "epoch": 0.47, "grad_norm": 1.308083973214287, "learning_rate": 8.958772370670826e-06, "loss": 0.6393, "step": 3656 }, { "epoch": 0.47, "grad_norm": 0.7685872453673079, "learning_rate": 8.958142152848017e-06, "loss": 0.6066, "step": 3657 }, { "epoch": 0.47, "grad_norm": 0.7596884286710909, "learning_rate": 8.957511766539745e-06, "loss": 0.5981, "step": 3658 }, { "epoch": 0.47, "grad_norm": 0.7544052857735296, "learning_rate": 8.956881211772845e-06, "loss": 0.6317, "step": 3659 }, { "epoch": 0.47, "grad_norm": 0.8116571271042058, "learning_rate": 8.956250488574155e-06, "loss": 0.6562, "step": 3660 }, { "epoch": 0.47, "grad_norm": 0.8595715639648566, "learning_rate": 8.955619596970523e-06, "loss": 0.6507, "step": 3661 }, { "epoch": 0.47, "grad_norm": 0.6162109935608634, "learning_rate": 8.954988536988806e-06, "loss": 0.5304, "step": 3662 }, { "epoch": 0.47, "grad_norm": 0.7534583776145457, "learning_rate": 8.954357308655866e-06, "loss": 0.6389, "step": 3663 }, { "epoch": 0.47, "grad_norm": 0.7576891010065256, "learning_rate": 8.95372591199857e-06, "loss": 0.6503, "step": 3664 }, { "epoch": 0.47, "grad_norm": 0.5698501475385936, "learning_rate": 8.953094347043797e-06, "loss": 0.5285, "step": 3665 }, { "epoch": 0.47, "grad_norm": 0.695740224806512, "learning_rate": 8.95246261381843e-06, "loss": 0.5967, "step": 3666 }, { "epoch": 0.47, "grad_norm": 0.6306413817937664, "learning_rate": 8.95183071234936e-06, "loss": 0.6064, "step": 3667 }, { "epoch": 0.47, "grad_norm": 0.5847056946508924, "learning_rate": 8.951198642663485e-06, "loss": 0.5417, "step": 3668 }, { "epoch": 0.47, "grad_norm": 0.7661917869993571, "learning_rate": 8.95056640478771e-06, "loss": 0.6035, "step": 3669 }, { "epoch": 0.47, "grad_norm": 0.7672207861555538, "learning_rate": 8.949933998748946e-06, "loss": 0.5935, "step": 3670 }, { "epoch": 0.47, "grad_norm": 0.7525470670952771, "learning_rate": 8.949301424574115e-06, "loss": 0.6315, "step": 3671 }, { "epoch": 0.47, "grad_norm": 0.7143112124804997, "learning_rate": 8.948668682290142e-06, "loss": 0.5592, "step": 3672 }, { "epoch": 0.47, "grad_norm": 0.7831403294970477, "learning_rate": 8.948035771923961e-06, "loss": 0.6157, "step": 3673 }, { "epoch": 0.47, "grad_norm": 0.6399385467758807, "learning_rate": 8.947402693502515e-06, "loss": 0.4654, "step": 3674 }, { "epoch": 0.47, "grad_norm": 0.8733134726545604, "learning_rate": 8.94676944705275e-06, "loss": 0.6258, "step": 3675 }, { "epoch": 0.47, "grad_norm": 0.7460259667296016, "learning_rate": 8.946136032601623e-06, "loss": 0.5559, "step": 3676 }, { "epoch": 0.47, "grad_norm": 0.8508096676925495, "learning_rate": 8.945502450176092e-06, "loss": 0.5815, "step": 3677 }, { "epoch": 0.47, "grad_norm": 0.6754333686919946, "learning_rate": 8.944868699803131e-06, "loss": 0.5236, "step": 3678 }, { "epoch": 0.47, "grad_norm": 0.5879271451746475, "learning_rate": 8.944234781509715e-06, "loss": 0.5497, "step": 3679 }, { "epoch": 0.47, "grad_norm": 0.8066598560244146, "learning_rate": 8.94360069532283e-06, "loss": 0.5931, "step": 3680 }, { "epoch": 0.47, "grad_norm": 0.589504278045563, "learning_rate": 8.942966441269464e-06, "loss": 0.491, "step": 3681 }, { "epoch": 0.47, "grad_norm": 0.5845327895512482, "learning_rate": 8.942332019376616e-06, "loss": 0.5332, "step": 3682 }, { "epoch": 0.47, "grad_norm": 0.7246144716836019, "learning_rate": 8.941697429671292e-06, "loss": 0.6354, "step": 3683 }, { "epoch": 0.47, "grad_norm": 0.5774564684260933, "learning_rate": 8.941062672180504e-06, "loss": 0.526, "step": 3684 }, { "epoch": 0.47, "grad_norm": 0.823466105597688, "learning_rate": 8.940427746931273e-06, "loss": 0.5982, "step": 3685 }, { "epoch": 0.47, "grad_norm": 1.261700497687932, "learning_rate": 8.939792653950625e-06, "loss": 0.6182, "step": 3686 }, { "epoch": 0.47, "grad_norm": 0.6459124300892567, "learning_rate": 8.939157393265595e-06, "loss": 0.5445, "step": 3687 }, { "epoch": 0.47, "grad_norm": 0.6322548075569477, "learning_rate": 8.93852196490322e-06, "loss": 0.5482, "step": 3688 }, { "epoch": 0.47, "grad_norm": 0.797144074997912, "learning_rate": 8.93788636889055e-06, "loss": 0.5824, "step": 3689 }, { "epoch": 0.47, "grad_norm": 0.746353141671459, "learning_rate": 8.937250605254641e-06, "loss": 0.5989, "step": 3690 }, { "epoch": 0.47, "grad_norm": 0.8253001390495809, "learning_rate": 8.936614674022556e-06, "loss": 0.6158, "step": 3691 }, { "epoch": 0.47, "grad_norm": 0.8076365924064134, "learning_rate": 8.935978575221366e-06, "loss": 0.6325, "step": 3692 }, { "epoch": 0.47, "grad_norm": 0.5585774221205613, "learning_rate": 8.935342308878144e-06, "loss": 0.4789, "step": 3693 }, { "epoch": 0.47, "grad_norm": 0.6422880908803965, "learning_rate": 8.934705875019976e-06, "loss": 0.563, "step": 3694 }, { "epoch": 0.47, "grad_norm": 0.9356429250556946, "learning_rate": 8.934069273673951e-06, "loss": 0.5969, "step": 3695 }, { "epoch": 0.47, "grad_norm": 0.6924930173721241, "learning_rate": 8.93343250486717e-06, "loss": 0.5042, "step": 3696 }, { "epoch": 0.47, "grad_norm": 0.6793333757864841, "learning_rate": 8.932795568626736e-06, "loss": 0.4747, "step": 3697 }, { "epoch": 0.47, "grad_norm": 0.6395061693133614, "learning_rate": 8.93215846497976e-06, "loss": 0.5652, "step": 3698 }, { "epoch": 0.47, "grad_norm": 0.789979768524907, "learning_rate": 8.931521193953367e-06, "loss": 0.573, "step": 3699 }, { "epoch": 0.47, "grad_norm": 0.9383309760443929, "learning_rate": 8.930883755574678e-06, "loss": 0.6093, "step": 3700 }, { "epoch": 0.47, "grad_norm": 0.7544976213925937, "learning_rate": 8.93024614987083e-06, "loss": 0.676, "step": 3701 }, { "epoch": 0.47, "grad_norm": 0.7721047466971459, "learning_rate": 8.929608376868963e-06, "loss": 0.63, "step": 3702 }, { "epoch": 0.47, "grad_norm": 0.5767679134176217, "learning_rate": 8.928970436596223e-06, "loss": 0.4625, "step": 3703 }, { "epoch": 0.47, "grad_norm": 0.7562580186526595, "learning_rate": 8.928332329079767e-06, "loss": 0.6102, "step": 3704 }, { "epoch": 0.47, "grad_norm": 0.588729134635355, "learning_rate": 8.927694054346758e-06, "loss": 0.5789, "step": 3705 }, { "epoch": 0.47, "grad_norm": 0.7680495111791352, "learning_rate": 8.927055612424365e-06, "loss": 0.6219, "step": 3706 }, { "epoch": 0.47, "grad_norm": 0.6400531904276301, "learning_rate": 8.926417003339762e-06, "loss": 0.5259, "step": 3707 }, { "epoch": 0.47, "grad_norm": 0.6218218356598285, "learning_rate": 8.925778227120135e-06, "loss": 0.5129, "step": 3708 }, { "epoch": 0.47, "grad_norm": 0.7326078904729264, "learning_rate": 8.925139283792672e-06, "loss": 0.6119, "step": 3709 }, { "epoch": 0.47, "grad_norm": 0.6507012789068991, "learning_rate": 8.924500173384575e-06, "loss": 0.5466, "step": 3710 }, { "epoch": 0.47, "grad_norm": 0.8024968920410219, "learning_rate": 8.923860895923045e-06, "loss": 0.6062, "step": 3711 }, { "epoch": 0.47, "grad_norm": 0.6992840744481165, "learning_rate": 8.923221451435297e-06, "loss": 0.565, "step": 3712 }, { "epoch": 0.47, "grad_norm": 0.6309224522411508, "learning_rate": 8.922581839948548e-06, "loss": 0.5373, "step": 3713 }, { "epoch": 0.47, "grad_norm": 0.7646701275992417, "learning_rate": 8.921942061490023e-06, "loss": 0.6289, "step": 3714 }, { "epoch": 0.47, "grad_norm": 0.7521383474272808, "learning_rate": 8.92130211608696e-06, "loss": 0.6059, "step": 3715 }, { "epoch": 0.47, "grad_norm": 0.8684102885098306, "learning_rate": 8.920662003766595e-06, "loss": 0.6571, "step": 3716 }, { "epoch": 0.47, "grad_norm": 0.7495562426728156, "learning_rate": 8.920021724556178e-06, "loss": 0.6, "step": 3717 }, { "epoch": 0.47, "grad_norm": 0.6088179970275477, "learning_rate": 8.919381278482962e-06, "loss": 0.5447, "step": 3718 }, { "epoch": 0.47, "grad_norm": 0.8082923425081123, "learning_rate": 8.91874066557421e-06, "loss": 0.6528, "step": 3719 }, { "epoch": 0.47, "grad_norm": 0.5844725058247551, "learning_rate": 8.91809988585719e-06, "loss": 0.5033, "step": 3720 }, { "epoch": 0.47, "grad_norm": 0.9171774230903313, "learning_rate": 8.917458939359178e-06, "loss": 0.648, "step": 3721 }, { "epoch": 0.47, "grad_norm": 0.6941438210718455, "learning_rate": 8.91681782610746e-06, "loss": 0.5536, "step": 3722 }, { "epoch": 0.47, "grad_norm": 0.6281803328648323, "learning_rate": 8.91617654612932e-06, "loss": 0.4978, "step": 3723 }, { "epoch": 0.47, "grad_norm": 0.6482814929124665, "learning_rate": 8.915535099452061e-06, "loss": 0.538, "step": 3724 }, { "epoch": 0.47, "grad_norm": 0.7974226398465755, "learning_rate": 8.914893486102983e-06, "loss": 0.5559, "step": 3725 }, { "epoch": 0.47, "grad_norm": 0.6514447406437415, "learning_rate": 8.914251706109402e-06, "loss": 0.5573, "step": 3726 }, { "epoch": 0.47, "grad_norm": 0.8363222865577461, "learning_rate": 8.913609759498631e-06, "loss": 0.6463, "step": 3727 }, { "epoch": 0.47, "grad_norm": 0.9874203579039861, "learning_rate": 8.912967646298001e-06, "loss": 0.6479, "step": 3728 }, { "epoch": 0.48, "grad_norm": 0.8192879669837126, "learning_rate": 8.912325366534842e-06, "loss": 0.6297, "step": 3729 }, { "epoch": 0.48, "grad_norm": 0.5820980775361936, "learning_rate": 8.911682920236495e-06, "loss": 0.5041, "step": 3730 }, { "epoch": 0.48, "grad_norm": 0.5925154836169583, "learning_rate": 8.911040307430303e-06, "loss": 0.5716, "step": 3731 }, { "epoch": 0.48, "grad_norm": 0.9268843463803044, "learning_rate": 8.910397528143627e-06, "loss": 0.6361, "step": 3732 }, { "epoch": 0.48, "grad_norm": 0.8574553628574446, "learning_rate": 8.909754582403822e-06, "loss": 0.6125, "step": 3733 }, { "epoch": 0.48, "grad_norm": 1.0145060814788536, "learning_rate": 8.909111470238256e-06, "loss": 0.6787, "step": 3734 }, { "epoch": 0.48, "grad_norm": 0.5704184117156543, "learning_rate": 8.90846819167431e-06, "loss": 0.5245, "step": 3735 }, { "epoch": 0.48, "grad_norm": 1.3110162758325523, "learning_rate": 8.907824746739363e-06, "loss": 0.6501, "step": 3736 }, { "epoch": 0.48, "grad_norm": 0.9043482548097569, "learning_rate": 8.9071811354608e-06, "loss": 0.6681, "step": 3737 }, { "epoch": 0.48, "grad_norm": 0.709447598432559, "learning_rate": 8.906537357866026e-06, "loss": 0.5274, "step": 3738 }, { "epoch": 0.48, "grad_norm": 1.0643688626522934, "learning_rate": 8.905893413982438e-06, "loss": 0.6129, "step": 3739 }, { "epoch": 0.48, "grad_norm": 0.6010955615352654, "learning_rate": 8.905249303837448e-06, "loss": 0.588, "step": 3740 }, { "epoch": 0.48, "grad_norm": 0.6190471340716149, "learning_rate": 8.904605027458477e-06, "loss": 0.5585, "step": 3741 }, { "epoch": 0.48, "grad_norm": 0.8350867052162392, "learning_rate": 8.903960584872945e-06, "loss": 0.653, "step": 3742 }, { "epoch": 0.48, "grad_norm": 0.6357271986644341, "learning_rate": 8.90331597610829e-06, "loss": 0.538, "step": 3743 }, { "epoch": 0.48, "grad_norm": 0.9068142955416113, "learning_rate": 8.90267120119194e-06, "loss": 0.6197, "step": 3744 }, { "epoch": 0.48, "grad_norm": 0.6219985719635462, "learning_rate": 8.902026260151355e-06, "loss": 0.5764, "step": 3745 }, { "epoch": 0.48, "grad_norm": 0.7068264692512304, "learning_rate": 8.90138115301398e-06, "loss": 0.5794, "step": 3746 }, { "epoch": 0.48, "grad_norm": 0.7212069131587737, "learning_rate": 8.900735879807274e-06, "loss": 0.5856, "step": 3747 }, { "epoch": 0.48, "grad_norm": 0.8011842512755848, "learning_rate": 8.900090440558706e-06, "loss": 0.6483, "step": 3748 }, { "epoch": 0.48, "grad_norm": 0.7802673096194498, "learning_rate": 8.899444835295754e-06, "loss": 0.6121, "step": 3749 }, { "epoch": 0.48, "grad_norm": 0.8161764962167143, "learning_rate": 8.898799064045895e-06, "loss": 0.6432, "step": 3750 }, { "epoch": 0.48, "grad_norm": 0.7955542280150687, "learning_rate": 8.898153126836618e-06, "loss": 0.6138, "step": 3751 }, { "epoch": 0.48, "grad_norm": 0.6364499303774556, "learning_rate": 8.897507023695418e-06, "loss": 0.5281, "step": 3752 }, { "epoch": 0.48, "grad_norm": 0.819956479796958, "learning_rate": 8.896860754649801e-06, "loss": 0.65, "step": 3753 }, { "epoch": 0.48, "grad_norm": 0.6882934163630536, "learning_rate": 8.896214319727273e-06, "loss": 0.611, "step": 3754 }, { "epoch": 0.48, "grad_norm": 0.71638994144973, "learning_rate": 8.895567718955353e-06, "loss": 0.5825, "step": 3755 }, { "epoch": 0.48, "grad_norm": 0.722479875569419, "learning_rate": 8.894920952361562e-06, "loss": 0.6253, "step": 3756 }, { "epoch": 0.48, "grad_norm": 1.1841776943865747, "learning_rate": 8.894274019973433e-06, "loss": 0.6373, "step": 3757 }, { "epoch": 0.48, "grad_norm": 0.7555028605921302, "learning_rate": 8.893626921818504e-06, "loss": 0.5653, "step": 3758 }, { "epoch": 0.48, "grad_norm": 0.7476453365424197, "learning_rate": 8.89297965792432e-06, "loss": 0.6065, "step": 3759 }, { "epoch": 0.48, "grad_norm": 0.5876891237970107, "learning_rate": 8.89233222831843e-06, "loss": 0.5155, "step": 3760 }, { "epoch": 0.48, "grad_norm": 0.5492884386908883, "learning_rate": 8.891684633028397e-06, "loss": 0.4743, "step": 3761 }, { "epoch": 0.48, "grad_norm": 0.6953441699789937, "learning_rate": 8.891036872081784e-06, "loss": 0.5283, "step": 3762 }, { "epoch": 0.48, "grad_norm": 0.5973827674200589, "learning_rate": 8.890388945506166e-06, "loss": 0.5016, "step": 3763 }, { "epoch": 0.48, "grad_norm": 0.7751216169597506, "learning_rate": 8.889740853329121e-06, "loss": 0.5599, "step": 3764 }, { "epoch": 0.48, "grad_norm": 0.8330327571955368, "learning_rate": 8.88909259557824e-06, "loss": 0.5825, "step": 3765 }, { "epoch": 0.48, "grad_norm": 0.7576639230977771, "learning_rate": 8.888444172281112e-06, "loss": 0.6222, "step": 3766 }, { "epoch": 0.48, "grad_norm": 0.639892378999344, "learning_rate": 8.887795583465343e-06, "loss": 0.5417, "step": 3767 }, { "epoch": 0.48, "grad_norm": 0.7382001861606764, "learning_rate": 8.88714682915854e-06, "loss": 0.6214, "step": 3768 }, { "epoch": 0.48, "grad_norm": 0.8397368465145428, "learning_rate": 8.886497909388318e-06, "loss": 0.697, "step": 3769 }, { "epoch": 0.48, "grad_norm": 0.6448698628371455, "learning_rate": 8.885848824182299e-06, "loss": 0.5623, "step": 3770 }, { "epoch": 0.48, "grad_norm": 0.5877626713984844, "learning_rate": 8.885199573568113e-06, "loss": 0.5625, "step": 3771 }, { "epoch": 0.48, "grad_norm": 0.7280144982047488, "learning_rate": 8.884550157573398e-06, "loss": 0.5979, "step": 3772 }, { "epoch": 0.48, "grad_norm": 4.846620867906846, "learning_rate": 8.883900576225795e-06, "loss": 0.6208, "step": 3773 }, { "epoch": 0.48, "grad_norm": 0.9117724489057908, "learning_rate": 8.883250829552954e-06, "loss": 0.5988, "step": 3774 }, { "epoch": 0.48, "grad_norm": 0.6069774680218666, "learning_rate": 8.882600917582535e-06, "loss": 0.4871, "step": 3775 }, { "epoch": 0.48, "grad_norm": 0.6308346196193754, "learning_rate": 8.881950840342204e-06, "loss": 0.5341, "step": 3776 }, { "epoch": 0.48, "grad_norm": 0.692961978011267, "learning_rate": 8.881300597859628e-06, "loss": 0.5472, "step": 3777 }, { "epoch": 0.48, "grad_norm": 0.6725794310638088, "learning_rate": 8.88065019016249e-06, "loss": 0.5911, "step": 3778 }, { "epoch": 0.48, "grad_norm": 0.7646460223700365, "learning_rate": 8.879999617278474e-06, "loss": 0.5878, "step": 3779 }, { "epoch": 0.48, "grad_norm": 1.3992359439150255, "learning_rate": 8.879348879235273e-06, "loss": 0.6499, "step": 3780 }, { "epoch": 0.48, "grad_norm": 1.5100420033806663, "learning_rate": 8.878697976060587e-06, "loss": 0.6436, "step": 3781 }, { "epoch": 0.48, "grad_norm": 0.6949835926994373, "learning_rate": 8.878046907782123e-06, "loss": 0.5458, "step": 3782 }, { "epoch": 0.48, "grad_norm": 0.7426340637441344, "learning_rate": 8.877395674427594e-06, "loss": 0.5895, "step": 3783 }, { "epoch": 0.48, "grad_norm": 0.9767401982713976, "learning_rate": 8.87674427602472e-06, "loss": 0.5982, "step": 3784 }, { "epoch": 0.48, "grad_norm": 0.739690809890258, "learning_rate": 8.876092712601232e-06, "loss": 0.6252, "step": 3785 }, { "epoch": 0.48, "grad_norm": 0.535623539253013, "learning_rate": 8.875440984184865e-06, "loss": 0.5018, "step": 3786 }, { "epoch": 0.48, "grad_norm": 0.6071631986355673, "learning_rate": 8.874789090803357e-06, "loss": 0.5932, "step": 3787 }, { "epoch": 0.48, "grad_norm": 0.743789965590836, "learning_rate": 8.87413703248446e-06, "loss": 0.6023, "step": 3788 }, { "epoch": 0.48, "grad_norm": 0.6378897141467588, "learning_rate": 8.87348480925593e-06, "loss": 0.5606, "step": 3789 }, { "epoch": 0.48, "grad_norm": 0.7544153680391198, "learning_rate": 8.87283242114553e-06, "loss": 0.6267, "step": 3790 }, { "epoch": 0.48, "grad_norm": 0.694397722754607, "learning_rate": 8.872179868181028e-06, "loss": 0.6275, "step": 3791 }, { "epoch": 0.48, "grad_norm": 0.8056110231682484, "learning_rate": 8.871527150390205e-06, "loss": 0.5115, "step": 3792 }, { "epoch": 0.48, "grad_norm": 0.7209354821466226, "learning_rate": 8.870874267800839e-06, "loss": 0.5651, "step": 3793 }, { "epoch": 0.48, "grad_norm": 0.6068038272840759, "learning_rate": 8.870221220440729e-06, "loss": 0.5354, "step": 3794 }, { "epoch": 0.48, "grad_norm": 0.6011028532546561, "learning_rate": 8.869568008337669e-06, "loss": 0.5603, "step": 3795 }, { "epoch": 0.48, "grad_norm": 1.0632137333295957, "learning_rate": 8.86891463151946e-06, "loss": 0.6775, "step": 3796 }, { "epoch": 0.48, "grad_norm": 0.6163072991381189, "learning_rate": 8.868261090013923e-06, "loss": 0.5296, "step": 3797 }, { "epoch": 0.48, "grad_norm": 0.6114030165969825, "learning_rate": 8.86760738384887e-06, "loss": 0.5308, "step": 3798 }, { "epoch": 0.48, "grad_norm": 0.5891162191896753, "learning_rate": 8.86695351305213e-06, "loss": 0.5534, "step": 3799 }, { "epoch": 0.48, "grad_norm": 0.7547945331350864, "learning_rate": 8.866299477651538e-06, "loss": 0.5845, "step": 3800 }, { "epoch": 0.48, "grad_norm": 0.5839010840818549, "learning_rate": 8.86564527767493e-06, "loss": 0.4936, "step": 3801 }, { "epoch": 0.48, "grad_norm": 0.851915511901835, "learning_rate": 8.864990913150157e-06, "loss": 0.6214, "step": 3802 }, { "epoch": 0.48, "grad_norm": 0.6491400211590191, "learning_rate": 8.86433638410507e-06, "loss": 0.5591, "step": 3803 }, { "epoch": 0.48, "grad_norm": 0.6449044857682525, "learning_rate": 8.863681690567533e-06, "loss": 0.5224, "step": 3804 }, { "epoch": 0.48, "grad_norm": 0.6280119347772116, "learning_rate": 8.863026832565412e-06, "loss": 0.5718, "step": 3805 }, { "epoch": 0.48, "grad_norm": 0.594692193545, "learning_rate": 8.862371810126584e-06, "loss": 0.5003, "step": 3806 }, { "epoch": 0.48, "grad_norm": 0.6855556235983449, "learning_rate": 8.86171662327893e-06, "loss": 0.5872, "step": 3807 }, { "epoch": 0.49, "grad_norm": 0.7636411056548783, "learning_rate": 8.861061272050339e-06, "loss": 0.6026, "step": 3808 }, { "epoch": 0.49, "grad_norm": 0.6804317876899881, "learning_rate": 8.860405756468709e-06, "loss": 0.5311, "step": 3809 }, { "epoch": 0.49, "grad_norm": 0.756783970454099, "learning_rate": 8.859750076561942e-06, "loss": 0.6205, "step": 3810 }, { "epoch": 0.49, "grad_norm": 0.7552420388713613, "learning_rate": 8.859094232357948e-06, "loss": 0.6087, "step": 3811 }, { "epoch": 0.49, "grad_norm": 0.8209982992660904, "learning_rate": 8.858438223884644e-06, "loss": 0.6055, "step": 3812 }, { "epoch": 0.49, "grad_norm": 0.6056568586874923, "learning_rate": 8.857782051169956e-06, "loss": 0.5688, "step": 3813 }, { "epoch": 0.49, "grad_norm": 0.8394050062298642, "learning_rate": 8.857125714241812e-06, "loss": 0.6111, "step": 3814 }, { "epoch": 0.49, "grad_norm": 0.6203192329381239, "learning_rate": 8.856469213128153e-06, "loss": 0.5345, "step": 3815 }, { "epoch": 0.49, "grad_norm": 0.694377448647183, "learning_rate": 8.855812547856923e-06, "loss": 0.5889, "step": 3816 }, { "epoch": 0.49, "grad_norm": 0.8583079935692607, "learning_rate": 8.855155718456075e-06, "loss": 0.6116, "step": 3817 }, { "epoch": 0.49, "grad_norm": 0.7022409878542383, "learning_rate": 8.854498724953564e-06, "loss": 0.6024, "step": 3818 }, { "epoch": 0.49, "grad_norm": 0.7305438770594196, "learning_rate": 8.853841567377363e-06, "loss": 0.5816, "step": 3819 }, { "epoch": 0.49, "grad_norm": 0.6651848706841397, "learning_rate": 8.853184245755439e-06, "loss": 0.5932, "step": 3820 }, { "epoch": 0.49, "grad_norm": 0.8874623586818131, "learning_rate": 8.852526760115778e-06, "loss": 0.6175, "step": 3821 }, { "epoch": 0.49, "grad_norm": 0.6640870272319327, "learning_rate": 8.851869110486362e-06, "loss": 0.534, "step": 3822 }, { "epoch": 0.49, "grad_norm": 0.5720754296337881, "learning_rate": 8.851211296895186e-06, "loss": 0.4845, "step": 3823 }, { "epoch": 0.49, "grad_norm": 0.8111147186094624, "learning_rate": 8.850553319370252e-06, "loss": 0.6294, "step": 3824 }, { "epoch": 0.49, "grad_norm": 0.7863860785012166, "learning_rate": 8.84989517793957e-06, "loss": 0.6354, "step": 3825 }, { "epoch": 0.49, "grad_norm": 0.8385381607676841, "learning_rate": 8.84923687263115e-06, "loss": 0.6259, "step": 3826 }, { "epoch": 0.49, "grad_norm": 0.7224318901767393, "learning_rate": 8.848578403473018e-06, "loss": 0.5923, "step": 3827 }, { "epoch": 0.49, "grad_norm": 0.8064197154174032, "learning_rate": 8.847919770493201e-06, "loss": 0.6581, "step": 3828 }, { "epoch": 0.49, "grad_norm": 0.570652831295744, "learning_rate": 8.847260973719736e-06, "loss": 0.562, "step": 3829 }, { "epoch": 0.49, "grad_norm": 0.7335012936634536, "learning_rate": 8.846602013180666e-06, "loss": 0.5797, "step": 3830 }, { "epoch": 0.49, "grad_norm": 0.6228570584062498, "learning_rate": 8.845942888904038e-06, "loss": 0.5717, "step": 3831 }, { "epoch": 0.49, "grad_norm": 0.7002958417312181, "learning_rate": 8.845283600917914e-06, "loss": 0.5549, "step": 3832 }, { "epoch": 0.49, "grad_norm": 0.636369277306478, "learning_rate": 8.844624149250354e-06, "loss": 0.6098, "step": 3833 }, { "epoch": 0.49, "grad_norm": 0.7456172052339775, "learning_rate": 8.84396453392943e-06, "loss": 0.5924, "step": 3834 }, { "epoch": 0.49, "grad_norm": 0.9141506628012072, "learning_rate": 8.843304754983217e-06, "loss": 0.6215, "step": 3835 }, { "epoch": 0.49, "grad_norm": 0.9676529684471039, "learning_rate": 8.842644812439804e-06, "loss": 0.6414, "step": 3836 }, { "epoch": 0.49, "grad_norm": 0.7605335745927064, "learning_rate": 8.84198470632728e-06, "loss": 0.6494, "step": 3837 }, { "epoch": 0.49, "grad_norm": 0.7200340824015131, "learning_rate": 8.841324436673745e-06, "loss": 0.6169, "step": 3838 }, { "epoch": 0.49, "grad_norm": 0.6287154856209565, "learning_rate": 8.840664003507304e-06, "loss": 0.5316, "step": 3839 }, { "epoch": 0.49, "grad_norm": 0.8146697148983737, "learning_rate": 8.840003406856067e-06, "loss": 0.5974, "step": 3840 }, { "epoch": 0.49, "grad_norm": 0.7181890454293451, "learning_rate": 8.83934264674816e-06, "loss": 0.5717, "step": 3841 }, { "epoch": 0.49, "grad_norm": 0.6613350860317138, "learning_rate": 8.838681723211701e-06, "loss": 0.5262, "step": 3842 }, { "epoch": 0.49, "grad_norm": 0.791346358642499, "learning_rate": 8.838020636274832e-06, "loss": 0.6149, "step": 3843 }, { "epoch": 0.49, "grad_norm": 0.7061119137167481, "learning_rate": 8.837359385965688e-06, "loss": 0.5473, "step": 3844 }, { "epoch": 0.49, "grad_norm": 0.8103306329770703, "learning_rate": 8.836697972312413e-06, "loss": 0.6177, "step": 3845 }, { "epoch": 0.49, "grad_norm": 0.765865468795884, "learning_rate": 8.83603639534317e-06, "loss": 0.6611, "step": 3846 }, { "epoch": 0.49, "grad_norm": 1.1176111088317007, "learning_rate": 8.835374655086116e-06, "loss": 0.5709, "step": 3847 }, { "epoch": 0.49, "grad_norm": 0.5733228264454907, "learning_rate": 8.834712751569417e-06, "loss": 0.4513, "step": 3848 }, { "epoch": 0.49, "grad_norm": 0.8420986077281761, "learning_rate": 8.834050684821251e-06, "loss": 0.5911, "step": 3849 }, { "epoch": 0.49, "grad_norm": 0.615357020898463, "learning_rate": 8.8333884548698e-06, "loss": 0.5186, "step": 3850 }, { "epoch": 0.49, "grad_norm": 0.6185145452469081, "learning_rate": 8.83272606174325e-06, "loss": 0.5413, "step": 3851 }, { "epoch": 0.49, "grad_norm": 0.7200094600005802, "learning_rate": 8.832063505469801e-06, "loss": 0.5959, "step": 3852 }, { "epoch": 0.49, "grad_norm": 0.5578276948046608, "learning_rate": 8.831400786077653e-06, "loss": 0.5372, "step": 3853 }, { "epoch": 0.49, "grad_norm": 0.5840033030101617, "learning_rate": 8.830737903595019e-06, "loss": 0.5608, "step": 3854 }, { "epoch": 0.49, "grad_norm": 0.8529316521621446, "learning_rate": 8.830074858050111e-06, "loss": 0.6426, "step": 3855 }, { "epoch": 0.49, "grad_norm": 0.5460277659603923, "learning_rate": 8.829411649471158e-06, "loss": 0.5459, "step": 3856 }, { "epoch": 0.49, "grad_norm": 0.6497799968015209, "learning_rate": 8.828748277886386e-06, "loss": 0.5793, "step": 3857 }, { "epoch": 0.49, "grad_norm": 0.6160836682642195, "learning_rate": 8.828084743324036e-06, "loss": 0.5312, "step": 3858 }, { "epoch": 0.49, "grad_norm": 0.5674052953999604, "learning_rate": 8.82742104581235e-06, "loss": 0.506, "step": 3859 }, { "epoch": 0.49, "grad_norm": 0.6026918906289411, "learning_rate": 8.826757185379582e-06, "loss": 0.5994, "step": 3860 }, { "epoch": 0.49, "grad_norm": 0.6268201867888585, "learning_rate": 8.826093162053989e-06, "loss": 0.4832, "step": 3861 }, { "epoch": 0.49, "grad_norm": 0.7433241407554696, "learning_rate": 8.825428975863836e-06, "loss": 0.5958, "step": 3862 }, { "epoch": 0.49, "grad_norm": 0.78294957505671, "learning_rate": 8.824764626837397e-06, "loss": 0.6129, "step": 3863 }, { "epoch": 0.49, "grad_norm": 0.7157255879477162, "learning_rate": 8.82410011500295e-06, "loss": 0.5746, "step": 3864 }, { "epoch": 0.49, "grad_norm": 0.6111326643348116, "learning_rate": 8.823435440388779e-06, "loss": 0.475, "step": 3865 }, { "epoch": 0.49, "grad_norm": 0.6872514168090972, "learning_rate": 8.822770603023181e-06, "loss": 0.5695, "step": 3866 }, { "epoch": 0.49, "grad_norm": 0.8509162722683828, "learning_rate": 8.822105602934454e-06, "loss": 0.6404, "step": 3867 }, { "epoch": 0.49, "grad_norm": 0.6578410806098519, "learning_rate": 8.821440440150906e-06, "loss": 0.5242, "step": 3868 }, { "epoch": 0.49, "grad_norm": 0.6987518175746289, "learning_rate": 8.82077511470085e-06, "loss": 0.5508, "step": 3869 }, { "epoch": 0.49, "grad_norm": 1.0894242529125457, "learning_rate": 8.820109626612604e-06, "loss": 0.6254, "step": 3870 }, { "epoch": 0.49, "grad_norm": 0.853879069248099, "learning_rate": 8.8194439759145e-06, "loss": 0.6686, "step": 3871 }, { "epoch": 0.49, "grad_norm": 0.8424280450962335, "learning_rate": 8.818778162634874e-06, "loss": 0.6449, "step": 3872 }, { "epoch": 0.49, "grad_norm": 0.8413090703550339, "learning_rate": 8.818112186802062e-06, "loss": 0.6777, "step": 3873 }, { "epoch": 0.49, "grad_norm": 0.6179861901530256, "learning_rate": 8.817446048444415e-06, "loss": 0.5722, "step": 3874 }, { "epoch": 0.49, "grad_norm": 0.6634789003231849, "learning_rate": 8.816779747590289e-06, "loss": 0.5636, "step": 3875 }, { "epoch": 0.49, "grad_norm": 0.7289076672701938, "learning_rate": 8.816113284268046e-06, "loss": 0.5762, "step": 3876 }, { "epoch": 0.49, "grad_norm": 0.6968440077589, "learning_rate": 8.815446658506056e-06, "loss": 0.5641, "step": 3877 }, { "epoch": 0.49, "grad_norm": 0.5641354164248852, "learning_rate": 8.814779870332692e-06, "loss": 0.5449, "step": 3878 }, { "epoch": 0.49, "grad_norm": 0.6193433571350114, "learning_rate": 8.81411291977634e-06, "loss": 0.5039, "step": 3879 }, { "epoch": 0.49, "grad_norm": 0.8229145202184409, "learning_rate": 8.81344580686539e-06, "loss": 0.637, "step": 3880 }, { "epoch": 0.49, "grad_norm": 0.8346751370246499, "learning_rate": 8.812778531628238e-06, "loss": 0.6622, "step": 3881 }, { "epoch": 0.49, "grad_norm": 1.1992556023933005, "learning_rate": 8.812111094093287e-06, "loss": 0.6383, "step": 3882 }, { "epoch": 0.49, "grad_norm": 0.5229181416011706, "learning_rate": 8.811443494288949e-06, "loss": 0.5322, "step": 3883 }, { "epoch": 0.49, "grad_norm": 0.5361558155327227, "learning_rate": 8.81077573224364e-06, "loss": 0.5464, "step": 3884 }, { "epoch": 0.49, "grad_norm": 0.680848544029737, "learning_rate": 8.810107807985787e-06, "loss": 0.6104, "step": 3885 }, { "epoch": 0.5, "grad_norm": 0.7053434025936305, "learning_rate": 8.80943972154382e-06, "loss": 0.5591, "step": 3886 }, { "epoch": 0.5, "grad_norm": 0.763889628092451, "learning_rate": 8.808771472946176e-06, "loss": 0.5878, "step": 3887 }, { "epoch": 0.5, "grad_norm": 0.7282180768583473, "learning_rate": 8.808103062221303e-06, "loss": 0.5763, "step": 3888 }, { "epoch": 0.5, "grad_norm": 0.8156709587893172, "learning_rate": 8.80743448939765e-06, "loss": 0.6678, "step": 3889 }, { "epoch": 0.5, "grad_norm": 0.6750448945648101, "learning_rate": 8.806765754503679e-06, "loss": 0.568, "step": 3890 }, { "epoch": 0.5, "grad_norm": 0.6642955876761903, "learning_rate": 8.806096857567854e-06, "loss": 0.5051, "step": 3891 }, { "epoch": 0.5, "grad_norm": 0.7798986842305553, "learning_rate": 8.805427798618647e-06, "loss": 0.6624, "step": 3892 }, { "epoch": 0.5, "grad_norm": 0.5693023559753447, "learning_rate": 8.804758577684541e-06, "loss": 0.5417, "step": 3893 }, { "epoch": 0.5, "grad_norm": 0.7554695739250997, "learning_rate": 8.80408919479402e-06, "loss": 0.6065, "step": 3894 }, { "epoch": 0.5, "grad_norm": 0.7597579230078431, "learning_rate": 8.803419649975577e-06, "loss": 0.5305, "step": 3895 }, { "epoch": 0.5, "grad_norm": 0.6083369015618603, "learning_rate": 8.802749943257715e-06, "loss": 0.5535, "step": 3896 }, { "epoch": 0.5, "grad_norm": 0.6535379816174423, "learning_rate": 8.80208007466894e-06, "loss": 0.6076, "step": 3897 }, { "epoch": 0.5, "grad_norm": 0.6099357229501594, "learning_rate": 8.801410044237765e-06, "loss": 0.5075, "step": 3898 }, { "epoch": 0.5, "grad_norm": 0.8653682857511228, "learning_rate": 8.800739851992712e-06, "loss": 0.6629, "step": 3899 }, { "epoch": 0.5, "grad_norm": 0.7785614415931186, "learning_rate": 8.800069497962309e-06, "loss": 0.5511, "step": 3900 }, { "epoch": 0.5, "grad_norm": 0.8210474196867106, "learning_rate": 8.799398982175091e-06, "loss": 0.6493, "step": 3901 }, { "epoch": 0.5, "grad_norm": 0.8364813898871233, "learning_rate": 8.7987283046596e-06, "loss": 0.6044, "step": 3902 }, { "epoch": 0.5, "grad_norm": 0.6456465765628633, "learning_rate": 8.798057465444383e-06, "loss": 0.5518, "step": 3903 }, { "epoch": 0.5, "grad_norm": 0.6409540453409056, "learning_rate": 8.797386464557998e-06, "loss": 0.5093, "step": 3904 }, { "epoch": 0.5, "grad_norm": 0.7971425482188506, "learning_rate": 8.796715302029004e-06, "loss": 0.6072, "step": 3905 }, { "epoch": 0.5, "grad_norm": 0.8016859291483994, "learning_rate": 8.796043977885973e-06, "loss": 0.5869, "step": 3906 }, { "epoch": 0.5, "grad_norm": 0.8663166529864562, "learning_rate": 8.79537249215748e-06, "loss": 0.6501, "step": 3907 }, { "epoch": 0.5, "grad_norm": 0.6453729917214116, "learning_rate": 8.794700844872108e-06, "loss": 0.5609, "step": 3908 }, { "epoch": 0.5, "grad_norm": 0.6642357020406892, "learning_rate": 8.794029036058449e-06, "loss": 0.5729, "step": 3909 }, { "epoch": 0.5, "grad_norm": 0.6688130520578079, "learning_rate": 8.793357065745097e-06, "loss": 0.6118, "step": 3910 }, { "epoch": 0.5, "grad_norm": 0.8056793290954218, "learning_rate": 8.792684933960656e-06, "loss": 0.6105, "step": 3911 }, { "epoch": 0.5, "grad_norm": 0.793202100360878, "learning_rate": 8.792012640733735e-06, "loss": 0.6141, "step": 3912 }, { "epoch": 0.5, "grad_norm": 0.6599718294437324, "learning_rate": 8.791340186092958e-06, "loss": 0.5557, "step": 3913 }, { "epoch": 0.5, "grad_norm": 0.841302491480118, "learning_rate": 8.790667570066941e-06, "loss": 0.6534, "step": 3914 }, { "epoch": 0.5, "grad_norm": 0.6025201812345489, "learning_rate": 8.78999479268432e-06, "loss": 0.4833, "step": 3915 }, { "epoch": 0.5, "grad_norm": 0.814955347674252, "learning_rate": 8.789321853973733e-06, "loss": 0.677, "step": 3916 }, { "epoch": 0.5, "grad_norm": 0.7262445888644592, "learning_rate": 8.788648753963822e-06, "loss": 0.6205, "step": 3917 }, { "epoch": 0.5, "grad_norm": 0.7006669001515851, "learning_rate": 8.78797549268324e-06, "loss": 0.5315, "step": 3918 }, { "epoch": 0.5, "grad_norm": 0.8567688768015841, "learning_rate": 8.787302070160649e-06, "loss": 0.6288, "step": 3919 }, { "epoch": 0.5, "grad_norm": 0.5519427008134669, "learning_rate": 8.786628486424707e-06, "loss": 0.5202, "step": 3920 }, { "epoch": 0.5, "grad_norm": 0.7393250649816075, "learning_rate": 8.785954741504093e-06, "loss": 0.6294, "step": 3921 }, { "epoch": 0.5, "grad_norm": 0.6297069959304996, "learning_rate": 8.785280835427484e-06, "loss": 0.4981, "step": 3922 }, { "epoch": 0.5, "grad_norm": 0.6528378035469024, "learning_rate": 8.784606768223566e-06, "loss": 0.5373, "step": 3923 }, { "epoch": 0.5, "grad_norm": 0.6227212886003048, "learning_rate": 8.783932539921032e-06, "loss": 0.5402, "step": 3924 }, { "epoch": 0.5, "grad_norm": 0.9937598494539905, "learning_rate": 8.78325815054858e-06, "loss": 0.599, "step": 3925 }, { "epoch": 0.5, "grad_norm": 0.6215995681617628, "learning_rate": 8.782583600134918e-06, "loss": 0.5335, "step": 3926 }, { "epoch": 0.5, "grad_norm": 0.5380554764527031, "learning_rate": 8.78190888870876e-06, "loss": 0.4775, "step": 3927 }, { "epoch": 0.5, "grad_norm": 0.7026124895736224, "learning_rate": 8.781234016298828e-06, "loss": 0.5746, "step": 3928 }, { "epoch": 0.5, "grad_norm": 0.8598823617613709, "learning_rate": 8.780558982933844e-06, "loss": 0.6042, "step": 3929 }, { "epoch": 0.5, "grad_norm": 2.8975364148059026, "learning_rate": 8.779883788642547e-06, "loss": 0.6304, "step": 3930 }, { "epoch": 0.5, "grad_norm": 0.541520899074446, "learning_rate": 8.779208433453674e-06, "loss": 0.5239, "step": 3931 }, { "epoch": 0.5, "grad_norm": 0.710982995919678, "learning_rate": 8.778532917395976e-06, "loss": 0.6096, "step": 3932 }, { "epoch": 0.5, "grad_norm": 0.5236664120263389, "learning_rate": 8.777857240498207e-06, "loss": 0.4722, "step": 3933 }, { "epoch": 0.5, "grad_norm": 0.7267115813270354, "learning_rate": 8.777181402789126e-06, "loss": 0.6277, "step": 3934 }, { "epoch": 0.5, "grad_norm": 0.7040182474563481, "learning_rate": 8.776505404297505e-06, "loss": 0.6227, "step": 3935 }, { "epoch": 0.5, "grad_norm": 0.5660328735296014, "learning_rate": 8.775829245052115e-06, "loss": 0.516, "step": 3936 }, { "epoch": 0.5, "grad_norm": 0.9056767207848451, "learning_rate": 8.775152925081738e-06, "loss": 0.6336, "step": 3937 }, { "epoch": 0.5, "grad_norm": 0.565275260062294, "learning_rate": 8.774476444415168e-06, "loss": 0.4978, "step": 3938 }, { "epoch": 0.5, "grad_norm": 0.8544456932806831, "learning_rate": 8.773799803081195e-06, "loss": 0.6114, "step": 3939 }, { "epoch": 0.5, "grad_norm": 0.7471981205121471, "learning_rate": 8.773123001108626e-06, "loss": 0.5687, "step": 3940 }, { "epoch": 0.5, "grad_norm": 0.7788288350058924, "learning_rate": 8.772446038526267e-06, "loss": 0.6382, "step": 3941 }, { "epoch": 0.5, "grad_norm": 0.7295741329091895, "learning_rate": 8.771768915362935e-06, "loss": 0.5073, "step": 3942 }, { "epoch": 0.5, "grad_norm": 0.7935413544226886, "learning_rate": 8.771091631647453e-06, "loss": 0.5818, "step": 3943 }, { "epoch": 0.5, "grad_norm": 0.9755369019121798, "learning_rate": 8.770414187408652e-06, "loss": 0.6647, "step": 3944 }, { "epoch": 0.5, "grad_norm": 0.8530256658483412, "learning_rate": 8.769736582675366e-06, "loss": 0.6504, "step": 3945 }, { "epoch": 0.5, "grad_norm": 0.7088817180572102, "learning_rate": 8.76905881747644e-06, "loss": 0.6092, "step": 3946 }, { "epoch": 0.5, "grad_norm": 0.6121614392287799, "learning_rate": 8.768380891840725e-06, "loss": 0.5421, "step": 3947 }, { "epoch": 0.5, "grad_norm": 0.8196487573531597, "learning_rate": 8.767702805797077e-06, "loss": 0.6215, "step": 3948 }, { "epoch": 0.5, "grad_norm": 0.6000699850241337, "learning_rate": 8.767024559374361e-06, "loss": 0.5263, "step": 3949 }, { "epoch": 0.5, "grad_norm": 0.6605492076345316, "learning_rate": 8.766346152601448e-06, "loss": 0.53, "step": 3950 }, { "epoch": 0.5, "grad_norm": 0.6352723081872792, "learning_rate": 8.765667585507213e-06, "loss": 0.5504, "step": 3951 }, { "epoch": 0.5, "grad_norm": 0.7852571521634043, "learning_rate": 8.764988858120543e-06, "loss": 0.641, "step": 3952 }, { "epoch": 0.5, "grad_norm": 0.7361326200022199, "learning_rate": 8.764309970470328e-06, "loss": 0.6594, "step": 3953 }, { "epoch": 0.5, "grad_norm": 0.6980304144144914, "learning_rate": 8.763630922585466e-06, "loss": 0.6162, "step": 3954 }, { "epoch": 0.5, "grad_norm": 0.6626658153353427, "learning_rate": 8.762951714494864e-06, "loss": 0.5584, "step": 3955 }, { "epoch": 0.5, "grad_norm": 0.6939803321819855, "learning_rate": 8.762272346227434e-06, "loss": 0.5594, "step": 3956 }, { "epoch": 0.5, "grad_norm": 0.7735382705157501, "learning_rate": 8.761592817812088e-06, "loss": 0.6308, "step": 3957 }, { "epoch": 0.5, "grad_norm": 0.8090253150963491, "learning_rate": 8.76091312927776e-06, "loss": 0.6188, "step": 3958 }, { "epoch": 0.5, "grad_norm": 0.6500013847522571, "learning_rate": 8.760233280653376e-06, "loss": 0.5905, "step": 3959 }, { "epoch": 0.5, "grad_norm": 0.6895783995054142, "learning_rate": 8.75955327196788e-06, "loss": 0.5169, "step": 3960 }, { "epoch": 0.5, "grad_norm": 0.5468811719940511, "learning_rate": 8.758873103250212e-06, "loss": 0.4966, "step": 3961 }, { "epoch": 0.5, "grad_norm": 0.7887595464839658, "learning_rate": 8.758192774529328e-06, "loss": 0.6637, "step": 3962 }, { "epoch": 0.5, "grad_norm": 1.7138163027372428, "learning_rate": 8.757512285834189e-06, "loss": 0.6375, "step": 3963 }, { "epoch": 0.51, "grad_norm": 0.5599752231453183, "learning_rate": 8.756831637193757e-06, "loss": 0.5507, "step": 3964 }, { "epoch": 0.51, "grad_norm": 0.6959909932970919, "learning_rate": 8.756150828637009e-06, "loss": 0.5288, "step": 3965 }, { "epoch": 0.51, "grad_norm": 0.7716403889418847, "learning_rate": 8.755469860192922e-06, "loss": 0.5618, "step": 3966 }, { "epoch": 0.51, "grad_norm": 0.7329369573913461, "learning_rate": 8.754788731890485e-06, "loss": 0.5584, "step": 3967 }, { "epoch": 0.51, "grad_norm": 0.7316888058876729, "learning_rate": 8.75410744375869e-06, "loss": 0.6067, "step": 3968 }, { "epoch": 0.51, "grad_norm": 0.5722472749697322, "learning_rate": 8.753425995826536e-06, "loss": 0.5167, "step": 3969 }, { "epoch": 0.51, "grad_norm": 0.5915106949795744, "learning_rate": 8.752744388123033e-06, "loss": 0.5552, "step": 3970 }, { "epoch": 0.51, "grad_norm": 0.8081715364208715, "learning_rate": 8.752062620677193e-06, "loss": 0.6373, "step": 3971 }, { "epoch": 0.51, "grad_norm": 0.5530192994616905, "learning_rate": 8.751380693518038e-06, "loss": 0.4732, "step": 3972 }, { "epoch": 0.51, "grad_norm": 0.5950488609759187, "learning_rate": 8.750698606674594e-06, "loss": 0.5268, "step": 3973 }, { "epoch": 0.51, "grad_norm": 0.5749555327536146, "learning_rate": 8.750016360175895e-06, "loss": 0.5284, "step": 3974 }, { "epoch": 0.51, "grad_norm": 0.6571820300636001, "learning_rate": 8.749333954050984e-06, "loss": 0.6087, "step": 3975 }, { "epoch": 0.51, "grad_norm": 0.6477038814961695, "learning_rate": 8.748651388328906e-06, "loss": 0.535, "step": 3976 }, { "epoch": 0.51, "grad_norm": 0.6708654495194948, "learning_rate": 8.74796866303872e-06, "loss": 0.568, "step": 3977 }, { "epoch": 0.51, "grad_norm": 0.7169526277350821, "learning_rate": 8.747285778209483e-06, "loss": 0.5198, "step": 3978 }, { "epoch": 0.51, "grad_norm": 0.7714452218504663, "learning_rate": 8.746602733870266e-06, "loss": 0.6246, "step": 3979 }, { "epoch": 0.51, "grad_norm": 0.7184550479829447, "learning_rate": 8.745919530050143e-06, "loss": 0.5991, "step": 3980 }, { "epoch": 0.51, "grad_norm": 0.8511114247254772, "learning_rate": 8.745236166778194e-06, "loss": 0.6768, "step": 3981 }, { "epoch": 0.51, "grad_norm": 0.5887325762540285, "learning_rate": 8.744552644083512e-06, "loss": 0.512, "step": 3982 }, { "epoch": 0.51, "grad_norm": 0.7314092283086151, "learning_rate": 8.743868961995188e-06, "loss": 0.4886, "step": 3983 }, { "epoch": 0.51, "grad_norm": 0.6158300606820595, "learning_rate": 8.743185120542326e-06, "loss": 0.5424, "step": 3984 }, { "epoch": 0.51, "grad_norm": 0.6551168389002009, "learning_rate": 8.742501119754037e-06, "loss": 0.5373, "step": 3985 }, { "epoch": 0.51, "grad_norm": 0.573471529393463, "learning_rate": 8.741816959659432e-06, "loss": 0.5122, "step": 3986 }, { "epoch": 0.51, "grad_norm": 0.6910089267339331, "learning_rate": 8.741132640287639e-06, "loss": 0.5785, "step": 3987 }, { "epoch": 0.51, "grad_norm": 0.6899036001471719, "learning_rate": 8.740448161667783e-06, "loss": 0.5408, "step": 3988 }, { "epoch": 0.51, "grad_norm": 0.7321408321882751, "learning_rate": 8.739763523829001e-06, "loss": 0.5501, "step": 3989 }, { "epoch": 0.51, "grad_norm": 0.7112818807731353, "learning_rate": 8.739078726800436e-06, "loss": 0.5512, "step": 3990 }, { "epoch": 0.51, "grad_norm": 1.1155330792726104, "learning_rate": 8.73839377061124e-06, "loss": 0.6421, "step": 3991 }, { "epoch": 0.51, "grad_norm": 0.5606726973898964, "learning_rate": 8.737708655290564e-06, "loss": 0.5187, "step": 3992 }, { "epoch": 0.51, "grad_norm": 0.7522452464776879, "learning_rate": 8.737023380867578e-06, "loss": 0.5994, "step": 3993 }, { "epoch": 0.51, "grad_norm": 0.9150160363986606, "learning_rate": 8.736337947371448e-06, "loss": 0.6838, "step": 3994 }, { "epoch": 0.51, "grad_norm": 0.7507945652532474, "learning_rate": 8.73565235483135e-06, "loss": 0.5765, "step": 3995 }, { "epoch": 0.51, "grad_norm": 0.5785208228204933, "learning_rate": 8.73496660327647e-06, "loss": 0.5373, "step": 3996 }, { "epoch": 0.51, "grad_norm": 0.5357985068475164, "learning_rate": 8.734280692735995e-06, "loss": 0.5086, "step": 3997 }, { "epoch": 0.51, "grad_norm": 0.572289222696746, "learning_rate": 8.733594623239125e-06, "loss": 0.5474, "step": 3998 }, { "epoch": 0.51, "grad_norm": 0.7119114566450648, "learning_rate": 8.732908394815063e-06, "loss": 0.6178, "step": 3999 }, { "epoch": 0.51, "grad_norm": 0.7543084118528486, "learning_rate": 8.732222007493019e-06, "loss": 0.5496, "step": 4000 }, { "epoch": 0.51, "grad_norm": 0.6542244018315772, "learning_rate": 8.73153546130221e-06, "loss": 0.5878, "step": 4001 }, { "epoch": 0.51, "grad_norm": 0.7133576120049314, "learning_rate": 8.730848756271862e-06, "loss": 0.639, "step": 4002 }, { "epoch": 0.51, "grad_norm": 0.7365670830170392, "learning_rate": 8.730161892431204e-06, "loss": 0.5871, "step": 4003 }, { "epoch": 0.51, "grad_norm": 0.8981157433936465, "learning_rate": 8.729474869809474e-06, "loss": 0.6407, "step": 4004 }, { "epoch": 0.51, "grad_norm": 0.5689923401973566, "learning_rate": 8.728787688435916e-06, "loss": 0.5288, "step": 4005 }, { "epoch": 0.51, "grad_norm": 0.6219527165380645, "learning_rate": 8.728100348339783e-06, "loss": 0.5202, "step": 4006 }, { "epoch": 0.51, "grad_norm": 0.630844260409958, "learning_rate": 8.72741284955033e-06, "loss": 0.5255, "step": 4007 }, { "epoch": 0.51, "grad_norm": 0.7227527979511357, "learning_rate": 8.726725192096824e-06, "loss": 0.5761, "step": 4008 }, { "epoch": 0.51, "grad_norm": 0.6202240367694997, "learning_rate": 8.726037376008536e-06, "loss": 0.4882, "step": 4009 }, { "epoch": 0.51, "grad_norm": 0.6559508357445297, "learning_rate": 8.72534940131474e-06, "loss": 0.5337, "step": 4010 }, { "epoch": 0.51, "grad_norm": 0.6405642393842376, "learning_rate": 8.724661268044728e-06, "loss": 0.48, "step": 4011 }, { "epoch": 0.51, "grad_norm": 0.6600537617560803, "learning_rate": 8.723972976227788e-06, "loss": 0.5125, "step": 4012 }, { "epoch": 0.51, "grad_norm": 0.6226580315348801, "learning_rate": 8.723284525893219e-06, "loss": 0.5397, "step": 4013 }, { "epoch": 0.51, "grad_norm": 0.6788184469513424, "learning_rate": 8.722595917070324e-06, "loss": 0.5355, "step": 4014 }, { "epoch": 0.51, "grad_norm": 0.5946836383408967, "learning_rate": 8.721907149788416e-06, "loss": 0.5817, "step": 4015 }, { "epoch": 0.51, "grad_norm": 0.6621622427559274, "learning_rate": 8.721218224076815e-06, "loss": 0.5309, "step": 4016 }, { "epoch": 0.51, "grad_norm": 0.6623768717483649, "learning_rate": 8.720529139964846e-06, "loss": 0.5924, "step": 4017 }, { "epoch": 0.51, "grad_norm": 0.8514805777793002, "learning_rate": 8.71983989748184e-06, "loss": 0.62, "step": 4018 }, { "epoch": 0.51, "grad_norm": 0.744568234209917, "learning_rate": 8.719150496657138e-06, "loss": 0.6015, "step": 4019 }, { "epoch": 0.51, "grad_norm": 0.8172516781128346, "learning_rate": 8.718460937520083e-06, "loss": 0.6092, "step": 4020 }, { "epoch": 0.51, "grad_norm": 0.7186431297226278, "learning_rate": 8.717771220100028e-06, "loss": 0.6158, "step": 4021 }, { "epoch": 0.51, "grad_norm": 0.5596547073665259, "learning_rate": 8.717081344426332e-06, "loss": 0.5079, "step": 4022 }, { "epoch": 0.51, "grad_norm": 0.7277117309897767, "learning_rate": 8.716391310528362e-06, "loss": 0.6234, "step": 4023 }, { "epoch": 0.51, "grad_norm": 0.5581829383311955, "learning_rate": 8.71570111843549e-06, "loss": 0.532, "step": 4024 }, { "epoch": 0.51, "grad_norm": 0.8546352303153767, "learning_rate": 8.715010768177096e-06, "loss": 0.6008, "step": 4025 }, { "epoch": 0.51, "grad_norm": 0.6431617435741185, "learning_rate": 8.714320259782564e-06, "loss": 0.5458, "step": 4026 }, { "epoch": 0.51, "grad_norm": 0.871148495129938, "learning_rate": 8.713629593281287e-06, "loss": 0.6525, "step": 4027 }, { "epoch": 0.51, "grad_norm": 0.5651827233493597, "learning_rate": 8.712938768702667e-06, "loss": 0.5311, "step": 4028 }, { "epoch": 0.51, "grad_norm": 0.6910219990548543, "learning_rate": 8.712247786076107e-06, "loss": 0.5667, "step": 4029 }, { "epoch": 0.51, "grad_norm": 0.6234576595087924, "learning_rate": 8.711556645431023e-06, "loss": 0.5451, "step": 4030 }, { "epoch": 0.51, "grad_norm": 0.7584569646657977, "learning_rate": 8.710865346796831e-06, "loss": 0.6466, "step": 4031 }, { "epoch": 0.51, "grad_norm": 0.7033104790397724, "learning_rate": 8.71017389020296e-06, "loss": 0.6485, "step": 4032 }, { "epoch": 0.51, "grad_norm": 0.6413070074062487, "learning_rate": 8.709482275678844e-06, "loss": 0.5302, "step": 4033 }, { "epoch": 0.51, "grad_norm": 0.9181804849062695, "learning_rate": 8.70879050325392e-06, "loss": 0.6107, "step": 4034 }, { "epoch": 0.51, "grad_norm": 0.614806654088529, "learning_rate": 8.708098572957634e-06, "loss": 0.5341, "step": 4035 }, { "epoch": 0.51, "grad_norm": 0.6211892196371582, "learning_rate": 8.707406484819445e-06, "loss": 0.5617, "step": 4036 }, { "epoch": 0.51, "grad_norm": 0.7178186116677533, "learning_rate": 8.706714238868808e-06, "loss": 0.5269, "step": 4037 }, { "epoch": 0.51, "grad_norm": 0.667514713153214, "learning_rate": 8.70602183513519e-06, "loss": 0.54, "step": 4038 }, { "epoch": 0.51, "grad_norm": 0.7723771910555213, "learning_rate": 8.705329273648065e-06, "loss": 0.6125, "step": 4039 }, { "epoch": 0.51, "grad_norm": 0.7126742556076741, "learning_rate": 8.704636554436913e-06, "loss": 0.5893, "step": 4040 }, { "epoch": 0.51, "grad_norm": 0.8399445894924042, "learning_rate": 8.703943677531223e-06, "loss": 0.5503, "step": 4041 }, { "epoch": 0.51, "grad_norm": 0.8695449332268784, "learning_rate": 8.703250642960486e-06, "loss": 0.6168, "step": 4042 }, { "epoch": 0.52, "grad_norm": 0.7359725643449906, "learning_rate": 8.702557450754201e-06, "loss": 0.607, "step": 4043 }, { "epoch": 0.52, "grad_norm": 0.7472907414402942, "learning_rate": 8.701864100941879e-06, "loss": 0.5726, "step": 4044 }, { "epoch": 0.52, "grad_norm": 0.6833697075096975, "learning_rate": 8.701170593553032e-06, "loss": 0.5109, "step": 4045 }, { "epoch": 0.52, "grad_norm": 0.8930552361383712, "learning_rate": 8.70047692861718e-06, "loss": 0.6676, "step": 4046 }, { "epoch": 0.52, "grad_norm": 0.6422085922646437, "learning_rate": 8.69978310616385e-06, "loss": 0.5863, "step": 4047 }, { "epoch": 0.52, "grad_norm": 0.656960316599447, "learning_rate": 8.699089126222576e-06, "loss": 0.5375, "step": 4048 }, { "epoch": 0.52, "grad_norm": 0.676279498152905, "learning_rate": 8.698394988822898e-06, "loss": 0.5397, "step": 4049 }, { "epoch": 0.52, "grad_norm": 0.6276087711034353, "learning_rate": 8.697700693994363e-06, "loss": 0.518, "step": 4050 }, { "epoch": 0.52, "grad_norm": 0.6371970251125455, "learning_rate": 8.697006241766527e-06, "loss": 0.5037, "step": 4051 }, { "epoch": 0.52, "grad_norm": 0.6893087387319538, "learning_rate": 8.696311632168949e-06, "loss": 0.5757, "step": 4052 }, { "epoch": 0.52, "grad_norm": 0.6162191817085644, "learning_rate": 8.695616865231195e-06, "loss": 0.5383, "step": 4053 }, { "epoch": 0.52, "grad_norm": 0.7829475087524488, "learning_rate": 8.694921940982843e-06, "loss": 0.5308, "step": 4054 }, { "epoch": 0.52, "grad_norm": 0.8307525867404142, "learning_rate": 8.694226859453469e-06, "loss": 0.616, "step": 4055 }, { "epoch": 0.52, "grad_norm": 0.9620411113643442, "learning_rate": 8.693531620672664e-06, "loss": 0.6843, "step": 4056 }, { "epoch": 0.52, "grad_norm": 0.6578430396849646, "learning_rate": 8.69283622467002e-06, "loss": 0.5298, "step": 4057 }, { "epoch": 0.52, "grad_norm": 0.5854872539494609, "learning_rate": 8.69214067147514e-06, "loss": 0.5428, "step": 4058 }, { "epoch": 0.52, "grad_norm": 0.6192794312437374, "learning_rate": 8.69144496111763e-06, "loss": 0.5471, "step": 4059 }, { "epoch": 0.52, "grad_norm": 0.61017652142063, "learning_rate": 8.690749093627102e-06, "loss": 0.5551, "step": 4060 }, { "epoch": 0.52, "grad_norm": 0.7824772911012285, "learning_rate": 8.690053069033182e-06, "loss": 0.5582, "step": 4061 }, { "epoch": 0.52, "grad_norm": 0.6472949686667087, "learning_rate": 8.689356887365492e-06, "loss": 0.5841, "step": 4062 }, { "epoch": 0.52, "grad_norm": 0.7541421204474587, "learning_rate": 8.68866054865367e-06, "loss": 0.6478, "step": 4063 }, { "epoch": 0.52, "grad_norm": 0.5682369069903911, "learning_rate": 8.687964052927354e-06, "loss": 0.5286, "step": 4064 }, { "epoch": 0.52, "grad_norm": 0.843779305332011, "learning_rate": 8.687267400216195e-06, "loss": 0.6123, "step": 4065 }, { "epoch": 0.52, "grad_norm": 0.8020967312698006, "learning_rate": 8.686570590549845e-06, "loss": 0.6425, "step": 4066 }, { "epoch": 0.52, "grad_norm": 0.7308260065605556, "learning_rate": 8.685873623957966e-06, "loss": 0.6648, "step": 4067 }, { "epoch": 0.52, "grad_norm": 0.5821694481684088, "learning_rate": 8.685176500470225e-06, "loss": 0.5071, "step": 4068 }, { "epoch": 0.52, "grad_norm": 0.6087344874233439, "learning_rate": 8.684479220116296e-06, "loss": 0.5287, "step": 4069 }, { "epoch": 0.52, "grad_norm": 0.6595120773884432, "learning_rate": 8.683781782925861e-06, "loss": 0.5019, "step": 4070 }, { "epoch": 0.52, "grad_norm": 1.3103340262646546, "learning_rate": 8.683084188928608e-06, "loss": 0.6074, "step": 4071 }, { "epoch": 0.52, "grad_norm": 0.7099659840423111, "learning_rate": 8.68238643815423e-06, "loss": 0.6247, "step": 4072 }, { "epoch": 0.52, "grad_norm": 0.7628189058571455, "learning_rate": 8.681688530632429e-06, "loss": 0.6058, "step": 4073 }, { "epoch": 0.52, "grad_norm": 0.6555232246497514, "learning_rate": 8.680990466392912e-06, "loss": 0.5221, "step": 4074 }, { "epoch": 0.52, "grad_norm": 1.3898887696892253, "learning_rate": 8.680292245465392e-06, "loss": 0.5638, "step": 4075 }, { "epoch": 0.52, "grad_norm": 0.6140742381851138, "learning_rate": 8.679593867879592e-06, "loss": 0.5619, "step": 4076 }, { "epoch": 0.52, "grad_norm": 0.7637791651443466, "learning_rate": 8.67889533366524e-06, "loss": 0.6555, "step": 4077 }, { "epoch": 0.52, "grad_norm": 0.7524925106588084, "learning_rate": 8.67819664285207e-06, "loss": 0.6099, "step": 4078 }, { "epoch": 0.52, "grad_norm": 0.6666990954598578, "learning_rate": 8.677497795469823e-06, "loss": 0.603, "step": 4079 }, { "epoch": 0.52, "grad_norm": 0.7646750122609234, "learning_rate": 8.676798791548246e-06, "loss": 0.612, "step": 4080 }, { "epoch": 0.52, "grad_norm": 0.5656202450360042, "learning_rate": 8.676099631117094e-06, "loss": 0.5268, "step": 4081 }, { "epoch": 0.52, "grad_norm": 0.6034868943379306, "learning_rate": 8.675400314206126e-06, "loss": 0.5224, "step": 4082 }, { "epoch": 0.52, "grad_norm": 0.8118847928983691, "learning_rate": 8.674700840845114e-06, "loss": 0.6165, "step": 4083 }, { "epoch": 0.52, "grad_norm": 0.641151788610495, "learning_rate": 8.674001211063829e-06, "loss": 0.5163, "step": 4084 }, { "epoch": 0.52, "grad_norm": 0.6908073121201854, "learning_rate": 8.673301424892053e-06, "loss": 0.5475, "step": 4085 }, { "epoch": 0.52, "grad_norm": 0.6066042040860486, "learning_rate": 8.672601482359575e-06, "loss": 0.5278, "step": 4086 }, { "epoch": 0.52, "grad_norm": 0.6191794521350743, "learning_rate": 8.671901383496186e-06, "loss": 0.5292, "step": 4087 }, { "epoch": 0.52, "grad_norm": 0.7959269387054396, "learning_rate": 8.671201128331687e-06, "loss": 0.6259, "step": 4088 }, { "epoch": 0.52, "grad_norm": 0.8350898812561149, "learning_rate": 8.67050071689589e-06, "loss": 0.6722, "step": 4089 }, { "epoch": 0.52, "grad_norm": 0.7523124281825234, "learning_rate": 8.669800149218607e-06, "loss": 0.584, "step": 4090 }, { "epoch": 0.52, "grad_norm": 0.6330707856985351, "learning_rate": 8.66909942532966e-06, "loss": 0.5714, "step": 4091 }, { "epoch": 0.52, "grad_norm": 0.5500080185137677, "learning_rate": 8.668398545258872e-06, "loss": 0.4706, "step": 4092 }, { "epoch": 0.52, "grad_norm": 0.7440400781920494, "learning_rate": 8.667697509036081e-06, "loss": 0.5917, "step": 4093 }, { "epoch": 0.52, "grad_norm": 0.7571174341942223, "learning_rate": 8.666996316691129e-06, "loss": 0.594, "step": 4094 }, { "epoch": 0.52, "grad_norm": 0.5886212622941261, "learning_rate": 8.66629496825386e-06, "loss": 0.5436, "step": 4095 }, { "epoch": 0.52, "grad_norm": 0.8535177447626139, "learning_rate": 8.665593463754131e-06, "loss": 0.6544, "step": 4096 }, { "epoch": 0.52, "grad_norm": 0.6216790588125976, "learning_rate": 8.664891803221802e-06, "loss": 0.5401, "step": 4097 }, { "epoch": 0.52, "grad_norm": 0.6166756233687279, "learning_rate": 8.66418998668674e-06, "loss": 0.5657, "step": 4098 }, { "epoch": 0.52, "grad_norm": 0.6877739594834584, "learning_rate": 8.66348801417882e-06, "loss": 0.5408, "step": 4099 }, { "epoch": 0.52, "grad_norm": 0.6640935474929383, "learning_rate": 8.66278588572792e-06, "loss": 0.5717, "step": 4100 }, { "epoch": 0.52, "grad_norm": 0.7394274919281051, "learning_rate": 8.66208360136393e-06, "loss": 0.5924, "step": 4101 }, { "epoch": 0.52, "grad_norm": 0.7655150255633112, "learning_rate": 8.661381161116745e-06, "loss": 0.5503, "step": 4102 }, { "epoch": 0.52, "grad_norm": 0.6972852218878891, "learning_rate": 8.660678565016263e-06, "loss": 0.5819, "step": 4103 }, { "epoch": 0.52, "grad_norm": 0.7388092332104087, "learning_rate": 8.659975813092393e-06, "loss": 0.5306, "step": 4104 }, { "epoch": 0.52, "grad_norm": 0.7989478190168652, "learning_rate": 8.659272905375047e-06, "loss": 0.588, "step": 4105 }, { "epoch": 0.52, "grad_norm": 0.6038444889572419, "learning_rate": 8.658569841894148e-06, "loss": 0.5637, "step": 4106 }, { "epoch": 0.52, "grad_norm": 0.7578801116926419, "learning_rate": 8.657866622679623e-06, "loss": 0.5777, "step": 4107 }, { "epoch": 0.52, "grad_norm": 0.6616704042341236, "learning_rate": 8.657163247761402e-06, "loss": 0.5615, "step": 4108 }, { "epoch": 0.52, "grad_norm": 0.5650059545608774, "learning_rate": 8.656459717169429e-06, "loss": 0.4692, "step": 4109 }, { "epoch": 0.52, "grad_norm": 0.6599071762573367, "learning_rate": 8.65575603093365e-06, "loss": 0.4731, "step": 4110 }, { "epoch": 0.52, "grad_norm": 0.9157734941245574, "learning_rate": 8.655052189084022e-06, "loss": 0.6301, "step": 4111 }, { "epoch": 0.52, "grad_norm": 0.806251991510044, "learning_rate": 8.654348191650499e-06, "loss": 0.5543, "step": 4112 }, { "epoch": 0.52, "grad_norm": 1.4774434268320176, "learning_rate": 8.653644038663051e-06, "loss": 0.6115, "step": 4113 }, { "epoch": 0.52, "grad_norm": 0.5390721471166144, "learning_rate": 8.652939730151653e-06, "loss": 0.5035, "step": 4114 }, { "epoch": 0.52, "grad_norm": 0.8317143946613599, "learning_rate": 8.652235266146284e-06, "loss": 0.6595, "step": 4115 }, { "epoch": 0.52, "grad_norm": 0.7913826290662731, "learning_rate": 8.651530646676928e-06, "loss": 0.6217, "step": 4116 }, { "epoch": 0.52, "grad_norm": 0.654443711056819, "learning_rate": 8.650825871773586e-06, "loss": 0.5208, "step": 4117 }, { "epoch": 0.52, "grad_norm": 0.7653087047593622, "learning_rate": 8.650120941466248e-06, "loss": 0.6432, "step": 4118 }, { "epoch": 0.52, "grad_norm": 0.57040990562097, "learning_rate": 8.649415855784929e-06, "loss": 0.5459, "step": 4119 }, { "epoch": 0.52, "grad_norm": 0.6527354173215035, "learning_rate": 8.648710614759637e-06, "loss": 0.5758, "step": 4120 }, { "epoch": 0.53, "grad_norm": 0.8138528256104013, "learning_rate": 8.648005218420393e-06, "loss": 0.588, "step": 4121 }, { "epoch": 0.53, "grad_norm": 0.613926470552269, "learning_rate": 8.647299666797226e-06, "loss": 0.5711, "step": 4122 }, { "epoch": 0.53, "grad_norm": 0.7165810152569417, "learning_rate": 8.646593959920166e-06, "loss": 0.6382, "step": 4123 }, { "epoch": 0.53, "grad_norm": 0.8305220534314462, "learning_rate": 8.645888097819255e-06, "loss": 0.6302, "step": 4124 }, { "epoch": 0.53, "grad_norm": 0.610496215479003, "learning_rate": 8.64518208052454e-06, "loss": 0.5477, "step": 4125 }, { "epoch": 0.53, "grad_norm": 0.5918479460318838, "learning_rate": 8.644475908066067e-06, "loss": 0.5531, "step": 4126 }, { "epoch": 0.53, "grad_norm": 0.8880709961866834, "learning_rate": 8.643769580473905e-06, "loss": 0.6025, "step": 4127 }, { "epoch": 0.53, "grad_norm": 0.7413602925646209, "learning_rate": 8.643063097778115e-06, "loss": 0.6323, "step": 4128 }, { "epoch": 0.53, "grad_norm": 0.9621443936566458, "learning_rate": 8.642356460008769e-06, "loss": 0.6034, "step": 4129 }, { "epoch": 0.53, "grad_norm": 0.5911236851883683, "learning_rate": 8.641649667195951e-06, "loss": 0.5578, "step": 4130 }, { "epoch": 0.53, "grad_norm": 0.5623687100068212, "learning_rate": 8.640942719369741e-06, "loss": 0.5149, "step": 4131 }, { "epoch": 0.53, "grad_norm": 0.695412717064727, "learning_rate": 8.640235616560234e-06, "loss": 0.6133, "step": 4132 }, { "epoch": 0.53, "grad_norm": 0.9097797461029631, "learning_rate": 8.639528358797531e-06, "loss": 0.5955, "step": 4133 }, { "epoch": 0.53, "grad_norm": 0.7032956044712427, "learning_rate": 8.638820946111735e-06, "loss": 0.5135, "step": 4134 }, { "epoch": 0.53, "grad_norm": 0.6102219813605976, "learning_rate": 8.638113378532958e-06, "loss": 0.5143, "step": 4135 }, { "epoch": 0.53, "grad_norm": 0.8384911835924354, "learning_rate": 8.637405656091323e-06, "loss": 0.5976, "step": 4136 }, { "epoch": 0.53, "grad_norm": 0.9153487422463845, "learning_rate": 8.63669777881695e-06, "loss": 0.6568, "step": 4137 }, { "epoch": 0.53, "grad_norm": 0.5797435423856463, "learning_rate": 8.635989746739977e-06, "loss": 0.509, "step": 4138 }, { "epoch": 0.53, "grad_norm": 0.6914392041462173, "learning_rate": 8.635281559890536e-06, "loss": 0.5441, "step": 4139 }, { "epoch": 0.53, "grad_norm": 0.7410179945760179, "learning_rate": 8.634573218298778e-06, "loss": 0.5199, "step": 4140 }, { "epoch": 0.53, "grad_norm": 0.6525900131655286, "learning_rate": 8.633864721994852e-06, "loss": 0.5494, "step": 4141 }, { "epoch": 0.53, "grad_norm": 0.5900897350075014, "learning_rate": 8.633156071008917e-06, "loss": 0.5399, "step": 4142 }, { "epoch": 0.53, "grad_norm": 0.8346246161729853, "learning_rate": 8.632447265371137e-06, "loss": 0.595, "step": 4143 }, { "epoch": 0.53, "grad_norm": 0.9441855549004021, "learning_rate": 8.631738305111686e-06, "loss": 0.6699, "step": 4144 }, { "epoch": 0.53, "grad_norm": 0.6496769937815052, "learning_rate": 8.63102919026074e-06, "loss": 0.5157, "step": 4145 }, { "epoch": 0.53, "grad_norm": 0.6931401396567558, "learning_rate": 8.630319920848485e-06, "loss": 0.5379, "step": 4146 }, { "epoch": 0.53, "grad_norm": 0.650106143481835, "learning_rate": 8.62961049690511e-06, "loss": 0.587, "step": 4147 }, { "epoch": 0.53, "grad_norm": 0.5441884620634613, "learning_rate": 8.628900918460816e-06, "loss": 0.5076, "step": 4148 }, { "epoch": 0.53, "grad_norm": 0.791754450685671, "learning_rate": 8.628191185545806e-06, "loss": 0.5994, "step": 4149 }, { "epoch": 0.53, "grad_norm": 0.7597955361054618, "learning_rate": 8.627481298190292e-06, "loss": 0.6826, "step": 4150 }, { "epoch": 0.53, "grad_norm": 0.8251425342384988, "learning_rate": 8.62677125642449e-06, "loss": 0.6157, "step": 4151 }, { "epoch": 0.53, "grad_norm": 0.650528724218808, "learning_rate": 8.626061060278627e-06, "loss": 0.555, "step": 4152 }, { "epoch": 0.53, "grad_norm": 0.7189530729765402, "learning_rate": 8.62535070978293e-06, "loss": 0.5655, "step": 4153 }, { "epoch": 0.53, "grad_norm": 0.7606499760756236, "learning_rate": 8.62464020496764e-06, "loss": 0.6268, "step": 4154 }, { "epoch": 0.53, "grad_norm": 0.5732538416130583, "learning_rate": 8.623929545862997e-06, "loss": 0.4855, "step": 4155 }, { "epoch": 0.53, "grad_norm": 0.7301025580970828, "learning_rate": 8.623218732499254e-06, "loss": 0.6045, "step": 4156 }, { "epoch": 0.53, "grad_norm": 0.7152390100076712, "learning_rate": 8.622507764906669e-06, "loss": 0.5478, "step": 4157 }, { "epoch": 0.53, "grad_norm": 0.7711394971385065, "learning_rate": 8.621796643115503e-06, "loss": 0.6485, "step": 4158 }, { "epoch": 0.53, "grad_norm": 0.693863683938519, "learning_rate": 8.621085367156028e-06, "loss": 0.4978, "step": 4159 }, { "epoch": 0.53, "grad_norm": 0.7262533239298832, "learning_rate": 8.62037393705852e-06, "loss": 0.6006, "step": 4160 }, { "epoch": 0.53, "grad_norm": 0.7916697470361381, "learning_rate": 8.619662352853264e-06, "loss": 0.6171, "step": 4161 }, { "epoch": 0.53, "grad_norm": 0.6121833954315268, "learning_rate": 8.618950614570547e-06, "loss": 0.5233, "step": 4162 }, { "epoch": 0.53, "grad_norm": 0.5597290368600105, "learning_rate": 8.618238722240668e-06, "loss": 0.4886, "step": 4163 }, { "epoch": 0.53, "grad_norm": 0.7417499369103104, "learning_rate": 8.617526675893928e-06, "loss": 0.6362, "step": 4164 }, { "epoch": 0.53, "grad_norm": 0.9988250437935274, "learning_rate": 8.616814475560636e-06, "loss": 0.6653, "step": 4165 }, { "epoch": 0.53, "grad_norm": 0.5158037139850548, "learning_rate": 8.61610212127111e-06, "loss": 0.4958, "step": 4166 }, { "epoch": 0.53, "grad_norm": 0.7505200182807563, "learning_rate": 8.615389613055673e-06, "loss": 0.6192, "step": 4167 }, { "epoch": 0.53, "grad_norm": 0.6833847468301292, "learning_rate": 8.614676950944654e-06, "loss": 0.6084, "step": 4168 }, { "epoch": 0.53, "grad_norm": 0.7950884612137535, "learning_rate": 8.613964134968387e-06, "loss": 0.6446, "step": 4169 }, { "epoch": 0.53, "grad_norm": 0.760316459249358, "learning_rate": 8.613251165157217e-06, "loss": 0.6126, "step": 4170 }, { "epoch": 0.53, "grad_norm": 0.7438221263805592, "learning_rate": 8.612538041541489e-06, "loss": 0.6419, "step": 4171 }, { "epoch": 0.53, "grad_norm": 0.6463356886247231, "learning_rate": 8.611824764151561e-06, "loss": 0.5119, "step": 4172 }, { "epoch": 0.53, "grad_norm": 0.6826826851340493, "learning_rate": 8.611111333017796e-06, "loss": 0.6654, "step": 4173 }, { "epoch": 0.53, "grad_norm": 0.7182546328168065, "learning_rate": 8.61039774817056e-06, "loss": 0.5875, "step": 4174 }, { "epoch": 0.53, "grad_norm": 0.6464219189713521, "learning_rate": 8.609684009640229e-06, "loss": 0.5131, "step": 4175 }, { "epoch": 0.53, "grad_norm": 0.8153351876063932, "learning_rate": 8.608970117457184e-06, "loss": 0.6086, "step": 4176 }, { "epoch": 0.53, "grad_norm": 0.7910516754668436, "learning_rate": 8.608256071651815e-06, "loss": 0.6165, "step": 4177 }, { "epoch": 0.53, "grad_norm": 0.656802740069538, "learning_rate": 8.607541872254514e-06, "loss": 0.5108, "step": 4178 }, { "epoch": 0.53, "grad_norm": 0.6371077279264837, "learning_rate": 8.606827519295684e-06, "loss": 0.5285, "step": 4179 }, { "epoch": 0.53, "grad_norm": 0.5539188929778869, "learning_rate": 8.606113012805733e-06, "loss": 0.5101, "step": 4180 }, { "epoch": 0.53, "grad_norm": 0.6357198748650659, "learning_rate": 8.605398352815074e-06, "loss": 0.5168, "step": 4181 }, { "epoch": 0.53, "grad_norm": 0.7508241451132187, "learning_rate": 8.604683539354129e-06, "loss": 0.5726, "step": 4182 }, { "epoch": 0.53, "grad_norm": 0.6215320432201875, "learning_rate": 8.603968572453324e-06, "loss": 0.5758, "step": 4183 }, { "epoch": 0.53, "grad_norm": 0.8068590590914627, "learning_rate": 8.603253452143093e-06, "loss": 0.6111, "step": 4184 }, { "epoch": 0.53, "grad_norm": 0.7141856617069584, "learning_rate": 8.602538178453877e-06, "loss": 0.5656, "step": 4185 }, { "epoch": 0.53, "grad_norm": 0.5908150107095695, "learning_rate": 8.601822751416124e-06, "loss": 0.5656, "step": 4186 }, { "epoch": 0.53, "grad_norm": 0.7204489416727751, "learning_rate": 8.601107171060286e-06, "loss": 0.5973, "step": 4187 }, { "epoch": 0.53, "grad_norm": 0.5675320397907174, "learning_rate": 8.600391437416822e-06, "loss": 0.5187, "step": 4188 }, { "epoch": 0.53, "grad_norm": 0.6409800876569068, "learning_rate": 8.599675550516201e-06, "loss": 0.5155, "step": 4189 }, { "epoch": 0.53, "grad_norm": 0.5860737309056203, "learning_rate": 8.598959510388894e-06, "loss": 0.5359, "step": 4190 }, { "epoch": 0.53, "grad_norm": 0.9123702174969488, "learning_rate": 8.598243317065382e-06, "loss": 0.6837, "step": 4191 }, { "epoch": 0.53, "grad_norm": 0.8262264583018809, "learning_rate": 8.597526970576148e-06, "loss": 0.6649, "step": 4192 }, { "epoch": 0.53, "grad_norm": 0.6748319481331477, "learning_rate": 8.59681047095169e-06, "loss": 0.595, "step": 4193 }, { "epoch": 0.53, "grad_norm": 0.676612574805558, "learning_rate": 8.596093818222504e-06, "loss": 0.5552, "step": 4194 }, { "epoch": 0.53, "grad_norm": 0.823835763345746, "learning_rate": 8.595377012419093e-06, "loss": 0.6093, "step": 4195 }, { "epoch": 0.53, "grad_norm": 0.8484589908110719, "learning_rate": 8.594660053571972e-06, "loss": 0.6066, "step": 4196 }, { "epoch": 0.53, "grad_norm": 0.7217782534743693, "learning_rate": 8.593942941711662e-06, "loss": 0.6151, "step": 4197 }, { "epoch": 0.53, "grad_norm": 0.5632304908788441, "learning_rate": 8.593225676868685e-06, "loss": 0.5254, "step": 4198 }, { "epoch": 0.53, "grad_norm": 0.6662700969602309, "learning_rate": 8.592508259073571e-06, "loss": 0.5316, "step": 4199 }, { "epoch": 0.54, "grad_norm": 0.737610740156497, "learning_rate": 8.591790688356863e-06, "loss": 0.5141, "step": 4200 }, { "epoch": 0.54, "grad_norm": 0.7547099648272309, "learning_rate": 8.5910729647491e-06, "loss": 0.5791, "step": 4201 }, { "epoch": 0.54, "grad_norm": 0.6084062881886371, "learning_rate": 8.59035508828084e-06, "loss": 0.5719, "step": 4202 }, { "epoch": 0.54, "grad_norm": 0.5751780275211879, "learning_rate": 8.589637058982634e-06, "loss": 0.5317, "step": 4203 }, { "epoch": 0.54, "grad_norm": 0.5713846862244606, "learning_rate": 8.58891887688505e-06, "loss": 0.5295, "step": 4204 }, { "epoch": 0.54, "grad_norm": 0.8001175862986285, "learning_rate": 8.588200542018658e-06, "loss": 0.5774, "step": 4205 }, { "epoch": 0.54, "grad_norm": 0.6770304915731522, "learning_rate": 8.587482054414036e-06, "loss": 0.5484, "step": 4206 }, { "epoch": 0.54, "grad_norm": 0.7087752401044223, "learning_rate": 8.586763414101766e-06, "loss": 0.5347, "step": 4207 }, { "epoch": 0.54, "grad_norm": 0.7071601500404803, "learning_rate": 8.586044621112442e-06, "loss": 0.5589, "step": 4208 }, { "epoch": 0.54, "grad_norm": 0.8252986939014532, "learning_rate": 8.585325675476655e-06, "loss": 0.5873, "step": 4209 }, { "epoch": 0.54, "grad_norm": 0.6291140667432583, "learning_rate": 8.58460657722501e-06, "loss": 0.5605, "step": 4210 }, { "epoch": 0.54, "grad_norm": 0.5163097025465593, "learning_rate": 8.58388732638812e-06, "loss": 0.5118, "step": 4211 }, { "epoch": 0.54, "grad_norm": 0.8195100101489301, "learning_rate": 8.583167922996598e-06, "loss": 0.5969, "step": 4212 }, { "epoch": 0.54, "grad_norm": 0.6322556113774183, "learning_rate": 8.58244836708107e-06, "loss": 0.617, "step": 4213 }, { "epoch": 0.54, "grad_norm": 0.587528861532704, "learning_rate": 8.581728658672159e-06, "loss": 0.4921, "step": 4214 }, { "epoch": 0.54, "grad_norm": 0.5784900038882096, "learning_rate": 8.581008797800506e-06, "loss": 0.524, "step": 4215 }, { "epoch": 0.54, "grad_norm": 0.6700150476268066, "learning_rate": 8.580288784496752e-06, "loss": 0.5758, "step": 4216 }, { "epoch": 0.54, "grad_norm": 0.5539846050468967, "learning_rate": 8.579568618791547e-06, "loss": 0.4944, "step": 4217 }, { "epoch": 0.54, "grad_norm": 0.7533418118582415, "learning_rate": 8.578848300715542e-06, "loss": 0.5857, "step": 4218 }, { "epoch": 0.54, "grad_norm": 0.5890395111804739, "learning_rate": 8.578127830299404e-06, "loss": 0.5226, "step": 4219 }, { "epoch": 0.54, "grad_norm": 0.6189759451509745, "learning_rate": 8.577407207573795e-06, "loss": 0.5422, "step": 4220 }, { "epoch": 0.54, "grad_norm": 0.7313971748589089, "learning_rate": 8.576686432569394e-06, "loss": 0.5291, "step": 4221 }, { "epoch": 0.54, "grad_norm": 0.6457195678984613, "learning_rate": 8.575965505316883e-06, "loss": 0.6376, "step": 4222 }, { "epoch": 0.54, "grad_norm": 0.6017810013002097, "learning_rate": 8.575244425846945e-06, "loss": 0.522, "step": 4223 }, { "epoch": 0.54, "grad_norm": 0.665085071496339, "learning_rate": 8.574523194190279e-06, "loss": 0.5689, "step": 4224 }, { "epoch": 0.54, "grad_norm": 0.5874504662682642, "learning_rate": 8.573801810377581e-06, "loss": 0.5589, "step": 4225 }, { "epoch": 0.54, "grad_norm": 0.7687996514174105, "learning_rate": 8.573080274439562e-06, "loss": 0.5539, "step": 4226 }, { "epoch": 0.54, "grad_norm": 1.11924188433432, "learning_rate": 8.572358586406933e-06, "loss": 0.6066, "step": 4227 }, { "epoch": 0.54, "grad_norm": 0.6729692267052756, "learning_rate": 8.571636746310414e-06, "loss": 0.4876, "step": 4228 }, { "epoch": 0.54, "grad_norm": 0.666474001354608, "learning_rate": 8.570914754180733e-06, "loss": 0.5208, "step": 4229 }, { "epoch": 0.54, "grad_norm": 0.7473142683705336, "learning_rate": 8.57019261004862e-06, "loss": 0.5898, "step": 4230 }, { "epoch": 0.54, "grad_norm": 0.6328917397470593, "learning_rate": 8.56947031394482e-06, "loss": 0.5285, "step": 4231 }, { "epoch": 0.54, "grad_norm": 0.7986635809428788, "learning_rate": 8.568747865900071e-06, "loss": 0.5585, "step": 4232 }, { "epoch": 0.54, "grad_norm": 0.7868504692724377, "learning_rate": 8.56802526594513e-06, "loss": 0.5947, "step": 4233 }, { "epoch": 0.54, "grad_norm": 0.6468800138600855, "learning_rate": 8.567302514110758e-06, "loss": 0.5143, "step": 4234 }, { "epoch": 0.54, "grad_norm": 0.7471634788203108, "learning_rate": 8.566579610427715e-06, "loss": 0.5681, "step": 4235 }, { "epoch": 0.54, "grad_norm": 0.6310562666237889, "learning_rate": 8.565856554926776e-06, "loss": 0.5338, "step": 4236 }, { "epoch": 0.54, "grad_norm": 0.6684379571259103, "learning_rate": 8.565133347638719e-06, "loss": 0.5026, "step": 4237 }, { "epoch": 0.54, "grad_norm": 0.7049264857095631, "learning_rate": 8.564409988594327e-06, "loss": 0.5413, "step": 4238 }, { "epoch": 0.54, "grad_norm": 0.6182126875094807, "learning_rate": 8.563686477824392e-06, "loss": 0.5407, "step": 4239 }, { "epoch": 0.54, "grad_norm": 0.7842127618823762, "learning_rate": 8.562962815359712e-06, "loss": 0.6675, "step": 4240 }, { "epoch": 0.54, "grad_norm": 0.7562142014068194, "learning_rate": 8.56223900123109e-06, "loss": 0.6016, "step": 4241 }, { "epoch": 0.54, "grad_norm": 0.880586079244348, "learning_rate": 8.561515035469337e-06, "loss": 0.6347, "step": 4242 }, { "epoch": 0.54, "grad_norm": 0.7206524348861861, "learning_rate": 8.56079091810527e-06, "loss": 0.5485, "step": 4243 }, { "epoch": 0.54, "grad_norm": 0.6043269227553943, "learning_rate": 8.560066649169713e-06, "loss": 0.546, "step": 4244 }, { "epoch": 0.54, "grad_norm": 0.7613203813613829, "learning_rate": 8.559342228693494e-06, "loss": 0.6181, "step": 4245 }, { "epoch": 0.54, "grad_norm": 0.7318250387236054, "learning_rate": 8.558617656707452e-06, "loss": 0.5911, "step": 4246 }, { "epoch": 0.54, "grad_norm": 0.7224439786834228, "learning_rate": 8.557892933242427e-06, "loss": 0.6229, "step": 4247 }, { "epoch": 0.54, "grad_norm": 0.7243203917415874, "learning_rate": 8.55716805832927e-06, "loss": 0.5974, "step": 4248 }, { "epoch": 0.54, "grad_norm": 0.626188156046898, "learning_rate": 8.556443031998834e-06, "loss": 0.528, "step": 4249 }, { "epoch": 0.54, "grad_norm": 0.7931637331244187, "learning_rate": 8.555717854281984e-06, "loss": 0.6025, "step": 4250 }, { "epoch": 0.54, "grad_norm": 0.6933561182954645, "learning_rate": 8.554992525209589e-06, "loss": 0.5412, "step": 4251 }, { "epoch": 0.54, "grad_norm": 0.7755683091433357, "learning_rate": 8.55426704481252e-06, "loss": 0.6288, "step": 4252 }, { "epoch": 0.54, "grad_norm": 0.585608131688245, "learning_rate": 8.55354141312166e-06, "loss": 0.5274, "step": 4253 }, { "epoch": 0.54, "grad_norm": 0.5471904085494139, "learning_rate": 8.552815630167901e-06, "loss": 0.5133, "step": 4254 }, { "epoch": 0.54, "grad_norm": 0.6381377826857912, "learning_rate": 8.552089695982132e-06, "loss": 0.5194, "step": 4255 }, { "epoch": 0.54, "grad_norm": 0.5628675477002059, "learning_rate": 8.551363610595253e-06, "loss": 0.4989, "step": 4256 }, { "epoch": 0.54, "grad_norm": 0.6824626597554021, "learning_rate": 8.550637374038178e-06, "loss": 0.572, "step": 4257 }, { "epoch": 0.54, "grad_norm": 0.757329896163903, "learning_rate": 8.549910986341813e-06, "loss": 0.5717, "step": 4258 }, { "epoch": 0.54, "grad_norm": 0.6578028395022709, "learning_rate": 8.549184447537082e-06, "loss": 0.576, "step": 4259 }, { "epoch": 0.54, "grad_norm": 0.5724289711452756, "learning_rate": 8.548457757654912e-06, "loss": 0.515, "step": 4260 }, { "epoch": 0.54, "grad_norm": 0.5910746454046204, "learning_rate": 8.547730916726234e-06, "loss": 0.5374, "step": 4261 }, { "epoch": 0.54, "grad_norm": 0.944792849147322, "learning_rate": 8.547003924781988e-06, "loss": 0.6537, "step": 4262 }, { "epoch": 0.54, "grad_norm": 0.5978913596841029, "learning_rate": 8.546276781853117e-06, "loss": 0.5498, "step": 4263 }, { "epoch": 0.54, "grad_norm": 0.5862549759040205, "learning_rate": 8.545549487970577e-06, "loss": 0.5268, "step": 4264 }, { "epoch": 0.54, "grad_norm": 0.7393124458187841, "learning_rate": 8.544822043165326e-06, "loss": 0.5998, "step": 4265 }, { "epoch": 0.54, "grad_norm": 0.660501968995592, "learning_rate": 8.544094447468328e-06, "loss": 0.594, "step": 4266 }, { "epoch": 0.54, "grad_norm": 0.694251272036348, "learning_rate": 8.543366700910555e-06, "loss": 0.5976, "step": 4267 }, { "epoch": 0.54, "grad_norm": 0.7277816593245259, "learning_rate": 8.542638803522985e-06, "loss": 0.5914, "step": 4268 }, { "epoch": 0.54, "grad_norm": 0.7565444795806341, "learning_rate": 8.5419107553366e-06, "loss": 0.6361, "step": 4269 }, { "epoch": 0.54, "grad_norm": 0.8124572372090746, "learning_rate": 8.541182556382393e-06, "loss": 0.6094, "step": 4270 }, { "epoch": 0.54, "grad_norm": 0.5984985772708591, "learning_rate": 8.540454206691362e-06, "loss": 0.5356, "step": 4271 }, { "epoch": 0.54, "grad_norm": 0.6298035543755568, "learning_rate": 8.539725706294509e-06, "loss": 0.5158, "step": 4272 }, { "epoch": 0.54, "grad_norm": 0.8228448548212153, "learning_rate": 8.538997055222842e-06, "loss": 0.5704, "step": 4273 }, { "epoch": 0.54, "grad_norm": 0.8257302554424572, "learning_rate": 8.53826825350738e-06, "loss": 0.6344, "step": 4274 }, { "epoch": 0.54, "grad_norm": 0.7226723426272715, "learning_rate": 8.537539301179145e-06, "loss": 0.6658, "step": 4275 }, { "epoch": 0.54, "grad_norm": 0.8743980343808205, "learning_rate": 8.536810198269168e-06, "loss": 0.5995, "step": 4276 }, { "epoch": 0.54, "grad_norm": 0.7517533146836193, "learning_rate": 8.536080944808483e-06, "loss": 0.6374, "step": 4277 }, { "epoch": 0.55, "grad_norm": 0.6061560152620385, "learning_rate": 8.53535154082813e-06, "loss": 0.5455, "step": 4278 }, { "epoch": 0.55, "grad_norm": 0.6457503016339637, "learning_rate": 8.53462198635916e-06, "loss": 0.5165, "step": 4279 }, { "epoch": 0.55, "grad_norm": 0.6406034037273896, "learning_rate": 8.533892281432627e-06, "loss": 0.5416, "step": 4280 }, { "epoch": 0.55, "grad_norm": 0.6012821386061982, "learning_rate": 8.533162426079591e-06, "loss": 0.5397, "step": 4281 }, { "epoch": 0.55, "grad_norm": 0.6000210809009845, "learning_rate": 8.532432420331122e-06, "loss": 0.5231, "step": 4282 }, { "epoch": 0.55, "grad_norm": 0.8292004198863734, "learning_rate": 8.531702264218295e-06, "loss": 0.6071, "step": 4283 }, { "epoch": 0.55, "grad_norm": 0.8269181989750112, "learning_rate": 8.530971957772186e-06, "loss": 0.6241, "step": 4284 }, { "epoch": 0.55, "grad_norm": 0.7960326046520969, "learning_rate": 8.530241501023885e-06, "loss": 0.673, "step": 4285 }, { "epoch": 0.55, "grad_norm": 0.6209979527108801, "learning_rate": 8.529510894004484e-06, "loss": 0.5193, "step": 4286 }, { "epoch": 0.55, "grad_norm": 0.8199632399794136, "learning_rate": 8.528780136745083e-06, "loss": 0.5951, "step": 4287 }, { "epoch": 0.55, "grad_norm": 0.7524330032267278, "learning_rate": 8.528049229276787e-06, "loss": 0.6162, "step": 4288 }, { "epoch": 0.55, "grad_norm": 0.7915587553085761, "learning_rate": 8.52731817163071e-06, "loss": 0.6037, "step": 4289 }, { "epoch": 0.55, "grad_norm": 0.8825922227382288, "learning_rate": 8.52658696383797e-06, "loss": 0.6703, "step": 4290 }, { "epoch": 0.55, "grad_norm": 0.8712034370811429, "learning_rate": 8.525855605929694e-06, "loss": 0.5956, "step": 4291 }, { "epoch": 0.55, "grad_norm": 0.7057093369300327, "learning_rate": 8.52512409793701e-06, "loss": 0.4895, "step": 4292 }, { "epoch": 0.55, "grad_norm": 0.7541501445141401, "learning_rate": 8.524392439891058e-06, "loss": 0.6444, "step": 4293 }, { "epoch": 0.55, "grad_norm": 0.7671029700881294, "learning_rate": 8.523660631822983e-06, "loss": 0.5962, "step": 4294 }, { "epoch": 0.55, "grad_norm": 1.0784969699630877, "learning_rate": 8.522928673763934e-06, "loss": 0.6503, "step": 4295 }, { "epoch": 0.55, "grad_norm": 0.5629563927965483, "learning_rate": 8.52219656574507e-06, "loss": 0.4987, "step": 4296 }, { "epoch": 0.55, "grad_norm": 0.9040638419303705, "learning_rate": 8.521464307797553e-06, "loss": 0.6195, "step": 4297 }, { "epoch": 0.55, "grad_norm": 0.5859049259778425, "learning_rate": 8.520731899952553e-06, "loss": 0.4963, "step": 4298 }, { "epoch": 0.55, "grad_norm": 0.8656947498642173, "learning_rate": 8.519999342241247e-06, "loss": 0.5696, "step": 4299 }, { "epoch": 0.55, "grad_norm": 0.7548629521197194, "learning_rate": 8.519266634694817e-06, "loss": 0.6493, "step": 4300 }, { "epoch": 0.55, "grad_norm": 0.5617877931928973, "learning_rate": 8.518533777344453e-06, "loss": 0.4903, "step": 4301 }, { "epoch": 0.55, "grad_norm": 0.8194925530236241, "learning_rate": 8.51780077022135e-06, "loss": 0.6523, "step": 4302 }, { "epoch": 0.55, "grad_norm": 0.7949077097631095, "learning_rate": 8.517067613356708e-06, "loss": 0.6174, "step": 4303 }, { "epoch": 0.55, "grad_norm": 0.6836000824920909, "learning_rate": 8.516334306781737e-06, "loss": 0.4949, "step": 4304 }, { "epoch": 0.55, "grad_norm": 0.6349075682784242, "learning_rate": 8.515600850527653e-06, "loss": 0.4851, "step": 4305 }, { "epoch": 0.55, "grad_norm": 0.6277272316069593, "learning_rate": 8.514867244625673e-06, "loss": 0.5361, "step": 4306 }, { "epoch": 0.55, "grad_norm": 0.7849166812396969, "learning_rate": 8.514133489107028e-06, "loss": 0.6509, "step": 4307 }, { "epoch": 0.55, "grad_norm": 0.7560839783702589, "learning_rate": 8.513399584002948e-06, "loss": 0.6344, "step": 4308 }, { "epoch": 0.55, "grad_norm": 0.5989992762340549, "learning_rate": 8.512665529344675e-06, "loss": 0.5297, "step": 4309 }, { "epoch": 0.55, "grad_norm": 0.7960445312315848, "learning_rate": 8.511931325163457e-06, "loss": 0.6894, "step": 4310 }, { "epoch": 0.55, "grad_norm": 0.6863474292387314, "learning_rate": 8.511196971490546e-06, "loss": 0.5417, "step": 4311 }, { "epoch": 0.55, "grad_norm": 0.7034927102973783, "learning_rate": 8.510462468357197e-06, "loss": 0.5478, "step": 4312 }, { "epoch": 0.55, "grad_norm": 0.7326193985658955, "learning_rate": 8.50972781579468e-06, "loss": 0.643, "step": 4313 }, { "epoch": 0.55, "grad_norm": 0.630221023403941, "learning_rate": 8.508993013834266e-06, "loss": 0.5863, "step": 4314 }, { "epoch": 0.55, "grad_norm": 0.854629603371839, "learning_rate": 8.508258062507231e-06, "loss": 0.7465, "step": 4315 }, { "epoch": 0.55, "grad_norm": 0.744227787448643, "learning_rate": 8.507522961844861e-06, "loss": 0.6361, "step": 4316 }, { "epoch": 0.55, "grad_norm": 0.8040693037841271, "learning_rate": 8.50678771187845e-06, "loss": 0.6694, "step": 4317 }, { "epoch": 0.55, "grad_norm": 0.8317219420549856, "learning_rate": 8.506052312639288e-06, "loss": 0.6361, "step": 4318 }, { "epoch": 0.55, "grad_norm": 0.6828791795082905, "learning_rate": 8.505316764158685e-06, "loss": 0.5857, "step": 4319 }, { "epoch": 0.55, "grad_norm": 0.6833364312519926, "learning_rate": 8.504581066467949e-06, "loss": 0.6378, "step": 4320 }, { "epoch": 0.55, "grad_norm": 0.604475259089991, "learning_rate": 8.503845219598395e-06, "loss": 0.5891, "step": 4321 }, { "epoch": 0.55, "grad_norm": 0.8001010618198438, "learning_rate": 8.503109223581348e-06, "loss": 0.6729, "step": 4322 }, { "epoch": 0.55, "grad_norm": 0.7029522662113565, "learning_rate": 8.502373078448136e-06, "loss": 0.5837, "step": 4323 }, { "epoch": 0.55, "grad_norm": 0.6558529097763018, "learning_rate": 8.501636784230091e-06, "loss": 0.6184, "step": 4324 }, { "epoch": 0.55, "grad_norm": 0.7121771072838233, "learning_rate": 8.50090034095856e-06, "loss": 0.5648, "step": 4325 }, { "epoch": 0.55, "grad_norm": 0.7918856170257168, "learning_rate": 8.50016374866489e-06, "loss": 0.6181, "step": 4326 }, { "epoch": 0.55, "grad_norm": 0.744418829081397, "learning_rate": 8.499427007380433e-06, "loss": 0.6777, "step": 4327 }, { "epoch": 0.55, "grad_norm": 0.8242404096462485, "learning_rate": 8.49869011713655e-06, "loss": 0.5594, "step": 4328 }, { "epoch": 0.55, "grad_norm": 0.8580536800368124, "learning_rate": 8.49795307796461e-06, "loss": 0.6931, "step": 4329 }, { "epoch": 0.55, "grad_norm": 0.757476645322607, "learning_rate": 8.497215889895984e-06, "loss": 0.6423, "step": 4330 }, { "epoch": 0.55, "grad_norm": 0.5462465331305486, "learning_rate": 8.496478552962053e-06, "loss": 0.557, "step": 4331 }, { "epoch": 0.55, "grad_norm": 0.7154005967583038, "learning_rate": 8.495741067194203e-06, "loss": 0.6269, "step": 4332 }, { "epoch": 0.55, "grad_norm": 0.7064131698022073, "learning_rate": 8.495003432623828e-06, "loss": 0.6145, "step": 4333 }, { "epoch": 0.55, "grad_norm": 0.578074893201352, "learning_rate": 8.494265649282325e-06, "loss": 0.5458, "step": 4334 }, { "epoch": 0.55, "grad_norm": 0.5167112198028618, "learning_rate": 8.4935277172011e-06, "loss": 0.5034, "step": 4335 }, { "epoch": 0.55, "grad_norm": 0.7197536131515607, "learning_rate": 8.492789636411562e-06, "loss": 0.6008, "step": 4336 }, { "epoch": 0.55, "grad_norm": 0.8414455711739971, "learning_rate": 8.492051406945132e-06, "loss": 0.6627, "step": 4337 }, { "epoch": 0.55, "grad_norm": 0.8546785748275249, "learning_rate": 8.491313028833232e-06, "loss": 0.692, "step": 4338 }, { "epoch": 0.55, "grad_norm": 0.5943690480985239, "learning_rate": 8.490574502107294e-06, "loss": 0.5173, "step": 4339 }, { "epoch": 0.55, "grad_norm": 0.7680057826779969, "learning_rate": 8.489835826798752e-06, "loss": 0.6163, "step": 4340 }, { "epoch": 0.55, "grad_norm": 0.5888222986238567, "learning_rate": 8.489097002939052e-06, "loss": 0.504, "step": 4341 }, { "epoch": 0.55, "grad_norm": 0.6894175108984276, "learning_rate": 8.488358030559643e-06, "loss": 0.6367, "step": 4342 }, { "epoch": 0.55, "grad_norm": 0.740876167443951, "learning_rate": 8.487618909691977e-06, "loss": 0.6399, "step": 4343 }, { "epoch": 0.55, "grad_norm": 0.7171090480709094, "learning_rate": 8.48687964036752e-06, "loss": 0.5975, "step": 4344 }, { "epoch": 0.55, "grad_norm": 0.7724993531563258, "learning_rate": 8.48614022261774e-06, "loss": 0.6287, "step": 4345 }, { "epoch": 0.55, "grad_norm": 0.7967773280677185, "learning_rate": 8.48540065647411e-06, "loss": 0.5835, "step": 4346 }, { "epoch": 0.55, "grad_norm": 0.6120062632400328, "learning_rate": 8.484660941968113e-06, "loss": 0.5514, "step": 4347 }, { "epoch": 0.55, "grad_norm": 0.6610184084121873, "learning_rate": 8.483921079131233e-06, "loss": 0.5538, "step": 4348 }, { "epoch": 0.55, "grad_norm": 0.6147261331619809, "learning_rate": 8.48318106799497e-06, "loss": 0.4788, "step": 4349 }, { "epoch": 0.55, "grad_norm": 0.7151182122048538, "learning_rate": 8.482440908590816e-06, "loss": 0.571, "step": 4350 }, { "epoch": 0.55, "grad_norm": 0.7994778922305898, "learning_rate": 8.48170060095028e-06, "loss": 0.5999, "step": 4351 }, { "epoch": 0.55, "grad_norm": 0.9544782281327197, "learning_rate": 8.480960145104877e-06, "loss": 0.6318, "step": 4352 }, { "epoch": 0.55, "grad_norm": 0.7274607856506495, "learning_rate": 8.480219541086125e-06, "loss": 0.619, "step": 4353 }, { "epoch": 0.55, "grad_norm": 0.8166137352707963, "learning_rate": 8.479478788925547e-06, "loss": 0.6274, "step": 4354 }, { "epoch": 0.55, "grad_norm": 0.7180740681173754, "learning_rate": 8.478737888654676e-06, "loss": 0.5898, "step": 4355 }, { "epoch": 0.55, "grad_norm": 0.5609736243588401, "learning_rate": 8.477996840305049e-06, "loss": 0.4804, "step": 4356 }, { "epoch": 0.56, "grad_norm": 0.5991913989693161, "learning_rate": 8.47725564390821e-06, "loss": 0.5141, "step": 4357 }, { "epoch": 0.56, "grad_norm": 0.894912278276884, "learning_rate": 8.476514299495713e-06, "loss": 0.6199, "step": 4358 }, { "epoch": 0.56, "grad_norm": 0.647204864992118, "learning_rate": 8.47577280709911e-06, "loss": 0.5633, "step": 4359 }, { "epoch": 0.56, "grad_norm": 0.7095036023821971, "learning_rate": 8.475031166749965e-06, "loss": 0.5271, "step": 4360 }, { "epoch": 0.56, "grad_norm": 0.6218753907478503, "learning_rate": 8.47428937847985e-06, "loss": 0.5431, "step": 4361 }, { "epoch": 0.56, "grad_norm": 0.6700308475198772, "learning_rate": 8.473547442320335e-06, "loss": 0.5286, "step": 4362 }, { "epoch": 0.56, "grad_norm": 0.7376301008276802, "learning_rate": 8.472805358303007e-06, "loss": 0.6417, "step": 4363 }, { "epoch": 0.56, "grad_norm": 0.7932546849779789, "learning_rate": 8.472063126459453e-06, "loss": 0.5774, "step": 4364 }, { "epoch": 0.56, "grad_norm": 0.7437735077644912, "learning_rate": 8.471320746821265e-06, "loss": 0.5766, "step": 4365 }, { "epoch": 0.56, "grad_norm": 0.8088037529803853, "learning_rate": 8.470578219420048e-06, "loss": 0.6105, "step": 4366 }, { "epoch": 0.56, "grad_norm": 1.0850284063236828, "learning_rate": 8.469835544287404e-06, "loss": 0.6306, "step": 4367 }, { "epoch": 0.56, "grad_norm": 0.6899848915368455, "learning_rate": 8.46909272145495e-06, "loss": 0.6038, "step": 4368 }, { "epoch": 0.56, "grad_norm": 0.6172932585039929, "learning_rate": 8.468349750954306e-06, "loss": 0.5382, "step": 4369 }, { "epoch": 0.56, "grad_norm": 0.8093365987408733, "learning_rate": 8.467606632817093e-06, "loss": 0.661, "step": 4370 }, { "epoch": 0.56, "grad_norm": 0.8171873486748183, "learning_rate": 8.466863367074949e-06, "loss": 0.6363, "step": 4371 }, { "epoch": 0.56, "grad_norm": 0.7271344873956909, "learning_rate": 8.46611995375951e-06, "loss": 0.4952, "step": 4372 }, { "epoch": 0.56, "grad_norm": 0.615570178408858, "learning_rate": 8.46537639290242e-06, "loss": 0.5651, "step": 4373 }, { "epoch": 0.56, "grad_norm": 0.6726183851577787, "learning_rate": 8.46463268453533e-06, "loss": 0.5349, "step": 4374 }, { "epoch": 0.56, "grad_norm": 0.7369120502687199, "learning_rate": 8.4638888286899e-06, "loss": 0.6024, "step": 4375 }, { "epoch": 0.56, "grad_norm": 1.0593756232299247, "learning_rate": 8.463144825397792e-06, "loss": 0.611, "step": 4376 }, { "epoch": 0.56, "grad_norm": 0.6251734736496313, "learning_rate": 8.462400674690673e-06, "loss": 0.5461, "step": 4377 }, { "epoch": 0.56, "grad_norm": 0.7709423700590702, "learning_rate": 8.461656376600222e-06, "loss": 0.665, "step": 4378 }, { "epoch": 0.56, "grad_norm": 0.7453921369022694, "learning_rate": 8.460911931158122e-06, "loss": 0.5916, "step": 4379 }, { "epoch": 0.56, "grad_norm": 0.785242195667744, "learning_rate": 8.46016733839606e-06, "loss": 0.5633, "step": 4380 }, { "epoch": 0.56, "grad_norm": 0.8122768105789598, "learning_rate": 8.45942259834573e-06, "loss": 0.6906, "step": 4381 }, { "epoch": 0.56, "grad_norm": 0.9553098162617438, "learning_rate": 8.458677711038836e-06, "loss": 0.6909, "step": 4382 }, { "epoch": 0.56, "grad_norm": 0.7384939551402149, "learning_rate": 8.457932676507085e-06, "loss": 0.5796, "step": 4383 }, { "epoch": 0.56, "grad_norm": 0.8338925407618774, "learning_rate": 8.457187494782189e-06, "loss": 0.6332, "step": 4384 }, { "epoch": 0.56, "grad_norm": 0.6910187485770999, "learning_rate": 8.456442165895869e-06, "loss": 0.6281, "step": 4385 }, { "epoch": 0.56, "grad_norm": 0.5985086222563284, "learning_rate": 8.455696689879852e-06, "loss": 0.5164, "step": 4386 }, { "epoch": 0.56, "grad_norm": 0.7031678249461256, "learning_rate": 8.454951066765868e-06, "loss": 0.5555, "step": 4387 }, { "epoch": 0.56, "grad_norm": 0.8621667979416765, "learning_rate": 8.45420529658566e-06, "loss": 0.6076, "step": 4388 }, { "epoch": 0.56, "grad_norm": 0.9141387606178659, "learning_rate": 8.453459379370969e-06, "loss": 0.6112, "step": 4389 }, { "epoch": 0.56, "grad_norm": 0.6266554619922422, "learning_rate": 8.45271331515355e-06, "loss": 0.527, "step": 4390 }, { "epoch": 0.56, "grad_norm": 0.8093966545388899, "learning_rate": 8.451967103965155e-06, "loss": 0.65, "step": 4391 }, { "epoch": 0.56, "grad_norm": 0.9465313148657902, "learning_rate": 8.451220745837553e-06, "loss": 0.6203, "step": 4392 }, { "epoch": 0.56, "grad_norm": 0.8207052142712354, "learning_rate": 8.45047424080251e-06, "loss": 0.6107, "step": 4393 }, { "epoch": 0.56, "grad_norm": 0.6106219759665608, "learning_rate": 8.44972758889181e-06, "loss": 0.5803, "step": 4394 }, { "epoch": 0.56, "grad_norm": 0.7100279341225781, "learning_rate": 8.448980790137226e-06, "loss": 0.5977, "step": 4395 }, { "epoch": 0.56, "grad_norm": 0.625937153542458, "learning_rate": 8.448233844570553e-06, "loss": 0.5184, "step": 4396 }, { "epoch": 0.56, "grad_norm": 0.7864800476436626, "learning_rate": 8.447486752223584e-06, "loss": 0.5955, "step": 4397 }, { "epoch": 0.56, "grad_norm": 0.7887189384852409, "learning_rate": 8.44673951312812e-06, "loss": 0.6166, "step": 4398 }, { "epoch": 0.56, "grad_norm": 0.7566261208448904, "learning_rate": 8.445992127315969e-06, "loss": 0.5728, "step": 4399 }, { "epoch": 0.56, "grad_norm": 0.5423403336054106, "learning_rate": 8.445244594818945e-06, "loss": 0.4905, "step": 4400 }, { "epoch": 0.56, "grad_norm": 0.621849865909122, "learning_rate": 8.44449691566887e-06, "loss": 0.5985, "step": 4401 }, { "epoch": 0.56, "grad_norm": 0.7771931513536706, "learning_rate": 8.443749089897564e-06, "loss": 0.598, "step": 4402 }, { "epoch": 0.56, "grad_norm": 0.8557942555061883, "learning_rate": 8.443001117536868e-06, "loss": 0.6443, "step": 4403 }, { "epoch": 0.56, "grad_norm": 0.6672955356737872, "learning_rate": 8.442252998618615e-06, "loss": 0.5755, "step": 4404 }, { "epoch": 0.56, "grad_norm": 0.580889998246896, "learning_rate": 8.441504733174653e-06, "loss": 0.5515, "step": 4405 }, { "epoch": 0.56, "grad_norm": 0.7432901191602979, "learning_rate": 8.440756321236829e-06, "loss": 0.591, "step": 4406 }, { "epoch": 0.56, "grad_norm": 0.9234935615780759, "learning_rate": 8.440007762837006e-06, "loss": 0.6407, "step": 4407 }, { "epoch": 0.56, "grad_norm": 0.649441462722885, "learning_rate": 8.439259058007046e-06, "loss": 0.5522, "step": 4408 }, { "epoch": 0.56, "grad_norm": 0.5782242670876485, "learning_rate": 8.438510206778816e-06, "loss": 0.5429, "step": 4409 }, { "epoch": 0.56, "grad_norm": 0.623231150960479, "learning_rate": 8.437761209184196e-06, "loss": 0.5516, "step": 4410 }, { "epoch": 0.56, "grad_norm": 0.8951910480939944, "learning_rate": 8.437012065255066e-06, "loss": 0.6411, "step": 4411 }, { "epoch": 0.56, "grad_norm": 0.5606392635611586, "learning_rate": 8.436262775023314e-06, "loss": 0.513, "step": 4412 }, { "epoch": 0.56, "grad_norm": 0.533246558256732, "learning_rate": 8.435513338520839e-06, "loss": 0.54, "step": 4413 }, { "epoch": 0.56, "grad_norm": 0.6118557654265043, "learning_rate": 8.434763755779538e-06, "loss": 0.5256, "step": 4414 }, { "epoch": 0.56, "grad_norm": 1.0038463817566687, "learning_rate": 8.434014026831321e-06, "loss": 0.5858, "step": 4415 }, { "epoch": 0.56, "grad_norm": 0.5914753578370091, "learning_rate": 8.4332641517081e-06, "loss": 0.4763, "step": 4416 }, { "epoch": 0.56, "grad_norm": 0.7647039799522053, "learning_rate": 8.432514130441794e-06, "loss": 0.6039, "step": 4417 }, { "epoch": 0.56, "grad_norm": 0.7876606790890524, "learning_rate": 8.431763963064331e-06, "loss": 0.667, "step": 4418 }, { "epoch": 0.56, "grad_norm": 0.6997141913370188, "learning_rate": 8.431013649607643e-06, "loss": 0.6125, "step": 4419 }, { "epoch": 0.56, "grad_norm": 0.7125075326371043, "learning_rate": 8.430263190103667e-06, "loss": 0.5906, "step": 4420 }, { "epoch": 0.56, "grad_norm": 0.9033565393958323, "learning_rate": 8.42951258458435e-06, "loss": 0.5742, "step": 4421 }, { "epoch": 0.56, "grad_norm": 0.618547274382211, "learning_rate": 8.42876183308164e-06, "loss": 0.5284, "step": 4422 }, { "epoch": 0.56, "grad_norm": 0.641059564173985, "learning_rate": 8.428010935627496e-06, "loss": 0.551, "step": 4423 }, { "epoch": 0.56, "grad_norm": 0.6386043167072742, "learning_rate": 8.42725989225388e-06, "loss": 0.5118, "step": 4424 }, { "epoch": 0.56, "grad_norm": 0.6912236302258694, "learning_rate": 8.426508702992763e-06, "loss": 0.5888, "step": 4425 }, { "epoch": 0.56, "grad_norm": 0.8647033230811324, "learning_rate": 8.425757367876122e-06, "loss": 0.6452, "step": 4426 }, { "epoch": 0.56, "grad_norm": 0.608122830630269, "learning_rate": 8.425005886935936e-06, "loss": 0.5214, "step": 4427 }, { "epoch": 0.56, "grad_norm": 0.5668157024818089, "learning_rate": 8.424254260204192e-06, "loss": 0.5299, "step": 4428 }, { "epoch": 0.56, "grad_norm": 0.7158474583360988, "learning_rate": 8.423502487712888e-06, "loss": 0.5725, "step": 4429 }, { "epoch": 0.56, "grad_norm": 0.7657430339608319, "learning_rate": 8.422750569494023e-06, "loss": 0.5384, "step": 4430 }, { "epoch": 0.56, "grad_norm": 0.7803553538455631, "learning_rate": 8.421998505579603e-06, "loss": 0.6445, "step": 4431 }, { "epoch": 0.56, "grad_norm": 0.6182084913311109, "learning_rate": 8.421246296001644e-06, "loss": 0.5148, "step": 4432 }, { "epoch": 0.56, "grad_norm": 0.8661938537133296, "learning_rate": 8.42049394079216e-06, "loss": 0.5838, "step": 4433 }, { "epoch": 0.56, "grad_norm": 0.5759663788211165, "learning_rate": 8.419741439983181e-06, "loss": 0.47, "step": 4434 }, { "epoch": 0.57, "grad_norm": 0.7058287456561901, "learning_rate": 8.418988793606738e-06, "loss": 0.6008, "step": 4435 }, { "epoch": 0.57, "grad_norm": 0.7666808515140944, "learning_rate": 8.418236001694864e-06, "loss": 0.61, "step": 4436 }, { "epoch": 0.57, "grad_norm": 0.5853176142649072, "learning_rate": 8.41748306427961e-06, "loss": 0.5359, "step": 4437 }, { "epoch": 0.57, "grad_norm": 0.5862092657965132, "learning_rate": 8.416729981393021e-06, "loss": 0.456, "step": 4438 }, { "epoch": 0.57, "grad_norm": 0.5724659081903708, "learning_rate": 8.415976753067154e-06, "loss": 0.5349, "step": 4439 }, { "epoch": 0.57, "grad_norm": 0.6833715600826622, "learning_rate": 8.415223379334075e-06, "loss": 0.5165, "step": 4440 }, { "epoch": 0.57, "grad_norm": 0.7931207098380295, "learning_rate": 8.414469860225849e-06, "loss": 0.6231, "step": 4441 }, { "epoch": 0.57, "grad_norm": 0.6813979269344471, "learning_rate": 8.413716195774553e-06, "loss": 0.533, "step": 4442 }, { "epoch": 0.57, "grad_norm": 0.8431388638660972, "learning_rate": 8.412962386012264e-06, "loss": 0.652, "step": 4443 }, { "epoch": 0.57, "grad_norm": 0.7565321624882378, "learning_rate": 8.412208430971076e-06, "loss": 0.5863, "step": 4444 }, { "epoch": 0.57, "grad_norm": 0.7096067574235724, "learning_rate": 8.411454330683077e-06, "loss": 0.5656, "step": 4445 }, { "epoch": 0.57, "grad_norm": 0.6383764971101797, "learning_rate": 8.41070008518037e-06, "loss": 0.5137, "step": 4446 }, { "epoch": 0.57, "grad_norm": 0.7497706062021322, "learning_rate": 8.40994569449506e-06, "loss": 0.6202, "step": 4447 }, { "epoch": 0.57, "grad_norm": 0.6312542897477779, "learning_rate": 8.409191158659259e-06, "loss": 0.5386, "step": 4448 }, { "epoch": 0.57, "grad_norm": 0.653863516156257, "learning_rate": 8.40843647770508e-06, "loss": 0.5385, "step": 4449 }, { "epoch": 0.57, "grad_norm": 0.6156252252004295, "learning_rate": 8.407681651664654e-06, "loss": 0.5179, "step": 4450 }, { "epoch": 0.57, "grad_norm": 0.7142752209597169, "learning_rate": 8.406926680570111e-06, "loss": 0.5873, "step": 4451 }, { "epoch": 0.57, "grad_norm": 1.0733664514333419, "learning_rate": 8.406171564453588e-06, "loss": 0.6702, "step": 4452 }, { "epoch": 0.57, "grad_norm": 0.5710862247044152, "learning_rate": 8.405416303347223e-06, "loss": 0.5509, "step": 4453 }, { "epoch": 0.57, "grad_norm": 0.7607990110698681, "learning_rate": 8.404660897283168e-06, "loss": 0.5989, "step": 4454 }, { "epoch": 0.57, "grad_norm": 0.8445745815776041, "learning_rate": 8.403905346293579e-06, "loss": 0.6713, "step": 4455 }, { "epoch": 0.57, "grad_norm": 0.7937089993604419, "learning_rate": 8.403149650410617e-06, "loss": 0.6041, "step": 4456 }, { "epoch": 0.57, "grad_norm": 0.7770958593130254, "learning_rate": 8.402393809666448e-06, "loss": 0.6032, "step": 4457 }, { "epoch": 0.57, "grad_norm": 0.7400065671466064, "learning_rate": 8.40163782409325e-06, "loss": 0.6354, "step": 4458 }, { "epoch": 0.57, "grad_norm": 0.7432366006126222, "learning_rate": 8.400881693723197e-06, "loss": 0.6646, "step": 4459 }, { "epoch": 0.57, "grad_norm": 0.7735783971083421, "learning_rate": 8.400125418588478e-06, "loss": 0.5998, "step": 4460 }, { "epoch": 0.57, "grad_norm": 0.7964233894410391, "learning_rate": 8.399368998721287e-06, "loss": 0.6373, "step": 4461 }, { "epoch": 0.57, "grad_norm": 0.6999861157095514, "learning_rate": 8.398612434153819e-06, "loss": 0.6027, "step": 4462 }, { "epoch": 0.57, "grad_norm": 0.6412778795231933, "learning_rate": 8.39785572491828e-06, "loss": 0.5091, "step": 4463 }, { "epoch": 0.57, "grad_norm": 0.7456919161309303, "learning_rate": 8.397098871046883e-06, "loss": 0.6147, "step": 4464 }, { "epoch": 0.57, "grad_norm": 0.5393626049617862, "learning_rate": 8.396341872571841e-06, "loss": 0.5005, "step": 4465 }, { "epoch": 0.57, "grad_norm": 0.8533845519661352, "learning_rate": 8.395584729525379e-06, "loss": 0.5932, "step": 4466 }, { "epoch": 0.57, "grad_norm": 0.6321572046611634, "learning_rate": 8.394827441939725e-06, "loss": 0.5685, "step": 4467 }, { "epoch": 0.57, "grad_norm": 0.6953148304892535, "learning_rate": 8.394070009847115e-06, "loss": 0.6013, "step": 4468 }, { "epoch": 0.57, "grad_norm": 0.9975105974700247, "learning_rate": 8.393312433279791e-06, "loss": 0.6189, "step": 4469 }, { "epoch": 0.57, "grad_norm": 0.6276522101524512, "learning_rate": 8.392554712270001e-06, "loss": 0.5636, "step": 4470 }, { "epoch": 0.57, "grad_norm": 0.7611873920784217, "learning_rate": 8.391796846849997e-06, "loss": 0.6058, "step": 4471 }, { "epoch": 0.57, "grad_norm": 0.6066135351030368, "learning_rate": 8.391038837052042e-06, "loss": 0.5753, "step": 4472 }, { "epoch": 0.57, "grad_norm": 0.5548566589434175, "learning_rate": 8.390280682908398e-06, "loss": 0.5037, "step": 4473 }, { "epoch": 0.57, "grad_norm": 0.6370250625053674, "learning_rate": 8.389522384451338e-06, "loss": 0.4581, "step": 4474 }, { "epoch": 0.57, "grad_norm": 1.0508030236374957, "learning_rate": 8.388763941713142e-06, "loss": 0.6039, "step": 4475 }, { "epoch": 0.57, "grad_norm": 0.5595806131880281, "learning_rate": 8.388005354726096e-06, "loss": 0.4803, "step": 4476 }, { "epoch": 0.57, "grad_norm": 0.833762981885313, "learning_rate": 8.387246623522487e-06, "loss": 0.6109, "step": 4477 }, { "epoch": 0.57, "grad_norm": 0.7650249292107758, "learning_rate": 8.386487748134615e-06, "loss": 0.5198, "step": 4478 }, { "epoch": 0.57, "grad_norm": 1.2155262611753492, "learning_rate": 8.38572872859478e-06, "loss": 0.6279, "step": 4479 }, { "epoch": 0.57, "grad_norm": 0.6689505057411278, "learning_rate": 8.38496956493529e-06, "loss": 0.5565, "step": 4480 }, { "epoch": 0.57, "grad_norm": 0.8908804902154505, "learning_rate": 8.384210257188465e-06, "loss": 0.6179, "step": 4481 }, { "epoch": 0.57, "grad_norm": 0.8230118279260698, "learning_rate": 8.383450805386624e-06, "loss": 0.6429, "step": 4482 }, { "epoch": 0.57, "grad_norm": 0.576327363748139, "learning_rate": 8.382691209562093e-06, "loss": 0.4824, "step": 4483 }, { "epoch": 0.57, "grad_norm": 0.608708951314828, "learning_rate": 8.381931469747208e-06, "loss": 0.539, "step": 4484 }, { "epoch": 0.57, "grad_norm": 0.8149030142574907, "learning_rate": 8.381171585974306e-06, "loss": 0.569, "step": 4485 }, { "epoch": 0.57, "grad_norm": 0.8041259526101094, "learning_rate": 8.380411558275735e-06, "loss": 0.6296, "step": 4486 }, { "epoch": 0.57, "grad_norm": 0.7740474608758181, "learning_rate": 8.379651386683845e-06, "loss": 0.5857, "step": 4487 }, { "epoch": 0.57, "grad_norm": 0.6021501709260586, "learning_rate": 8.378891071230998e-06, "loss": 0.4878, "step": 4488 }, { "epoch": 0.57, "grad_norm": 0.5518869268865115, "learning_rate": 8.378130611949553e-06, "loss": 0.5544, "step": 4489 }, { "epoch": 0.57, "grad_norm": 0.8623381337388066, "learning_rate": 8.377370008871884e-06, "loss": 0.6404, "step": 4490 }, { "epoch": 0.57, "grad_norm": 0.6187203560978535, "learning_rate": 8.376609262030366e-06, "loss": 0.5227, "step": 4491 }, { "epoch": 0.57, "grad_norm": 0.6501240335984548, "learning_rate": 8.375848371457383e-06, "loss": 0.564, "step": 4492 }, { "epoch": 0.57, "grad_norm": 0.8229738642659958, "learning_rate": 8.37508733718532e-06, "loss": 0.6042, "step": 4493 }, { "epoch": 0.57, "grad_norm": 0.7727942980664513, "learning_rate": 8.374326159246575e-06, "loss": 0.6231, "step": 4494 }, { "epoch": 0.57, "grad_norm": 0.7508246557879681, "learning_rate": 8.373564837673549e-06, "loss": 0.5802, "step": 4495 }, { "epoch": 0.57, "grad_norm": 0.5813973478855676, "learning_rate": 8.372803372498648e-06, "loss": 0.4964, "step": 4496 }, { "epoch": 0.57, "grad_norm": 0.7093755340221902, "learning_rate": 8.372041763754287e-06, "loss": 0.5232, "step": 4497 }, { "epoch": 0.57, "grad_norm": 0.7251925775924358, "learning_rate": 8.371280011472881e-06, "loss": 0.5971, "step": 4498 }, { "epoch": 0.57, "grad_norm": 0.5501750394360069, "learning_rate": 8.370518115686858e-06, "loss": 0.5746, "step": 4499 }, { "epoch": 0.57, "grad_norm": 0.6571418849244468, "learning_rate": 8.369756076428652e-06, "loss": 0.5331, "step": 4500 }, { "epoch": 0.57, "grad_norm": 0.5686787720364862, "learning_rate": 8.368993893730697e-06, "loss": 0.5394, "step": 4501 }, { "epoch": 0.57, "grad_norm": 0.5638748476950008, "learning_rate": 8.368231567625437e-06, "loss": 0.5315, "step": 4502 }, { "epoch": 0.57, "grad_norm": 0.7717308841066829, "learning_rate": 8.367469098145322e-06, "loss": 0.6847, "step": 4503 }, { "epoch": 0.57, "grad_norm": 0.8644609082076836, "learning_rate": 8.366706485322808e-06, "loss": 0.5843, "step": 4504 }, { "epoch": 0.57, "grad_norm": 0.6412821334544353, "learning_rate": 8.36594372919036e-06, "loss": 0.5142, "step": 4505 }, { "epoch": 0.57, "grad_norm": 0.5799824683265785, "learning_rate": 8.365180829780442e-06, "loss": 0.5174, "step": 4506 }, { "epoch": 0.57, "grad_norm": 0.568866374908323, "learning_rate": 8.36441778712553e-06, "loss": 0.5253, "step": 4507 }, { "epoch": 0.57, "grad_norm": 0.7030245093748975, "learning_rate": 8.363654601258103e-06, "loss": 0.5826, "step": 4508 }, { "epoch": 0.57, "grad_norm": 0.854340025941681, "learning_rate": 8.362891272210647e-06, "loss": 0.6624, "step": 4509 }, { "epoch": 0.57, "grad_norm": 0.7255927073232108, "learning_rate": 8.362127800015659e-06, "loss": 0.5874, "step": 4510 }, { "epoch": 0.57, "grad_norm": 0.5250171534714101, "learning_rate": 8.361364184705633e-06, "loss": 0.483, "step": 4511 }, { "epoch": 0.57, "grad_norm": 0.7513306481367137, "learning_rate": 8.360600426313074e-06, "loss": 0.5445, "step": 4512 }, { "epoch": 0.57, "grad_norm": 0.6615244418505919, "learning_rate": 8.359836524870496e-06, "loss": 0.5322, "step": 4513 }, { "epoch": 0.58, "grad_norm": 0.7702991030071065, "learning_rate": 8.359072480410412e-06, "loss": 0.6721, "step": 4514 }, { "epoch": 0.58, "grad_norm": 0.5494868261672948, "learning_rate": 8.358308292965347e-06, "loss": 0.5264, "step": 4515 }, { "epoch": 0.58, "grad_norm": 0.6066363350537363, "learning_rate": 8.35754396256783e-06, "loss": 0.5767, "step": 4516 }, { "epoch": 0.58, "grad_norm": 0.7460154252299692, "learning_rate": 8.356779489250395e-06, "loss": 0.566, "step": 4517 }, { "epoch": 0.58, "grad_norm": 0.647224113721489, "learning_rate": 8.356014873045584e-06, "loss": 0.5135, "step": 4518 }, { "epoch": 0.58, "grad_norm": 0.7487485443601356, "learning_rate": 8.355250113985945e-06, "loss": 0.6398, "step": 4519 }, { "epoch": 0.58, "grad_norm": 0.7762880385238963, "learning_rate": 8.35448521210403e-06, "loss": 0.6106, "step": 4520 }, { "epoch": 0.58, "grad_norm": 0.7100629932358956, "learning_rate": 8.353720167432397e-06, "loss": 0.6517, "step": 4521 }, { "epoch": 0.58, "grad_norm": 0.6947478186112446, "learning_rate": 8.352954980003615e-06, "loss": 0.528, "step": 4522 }, { "epoch": 0.58, "grad_norm": 0.7519450452872166, "learning_rate": 8.352189649850255e-06, "loss": 0.5414, "step": 4523 }, { "epoch": 0.58, "grad_norm": 0.7199672055423483, "learning_rate": 8.351424177004893e-06, "loss": 0.5983, "step": 4524 }, { "epoch": 0.58, "grad_norm": 0.6369900256999003, "learning_rate": 8.350658561500114e-06, "loss": 0.4919, "step": 4525 }, { "epoch": 0.58, "grad_norm": 0.6968041918701267, "learning_rate": 8.349892803368506e-06, "loss": 0.6362, "step": 4526 }, { "epoch": 0.58, "grad_norm": 0.6900397909829166, "learning_rate": 8.349126902642667e-06, "loss": 0.5738, "step": 4527 }, { "epoch": 0.58, "grad_norm": 0.7267647190814117, "learning_rate": 8.348360859355198e-06, "loss": 0.6043, "step": 4528 }, { "epoch": 0.58, "grad_norm": 0.779604519713317, "learning_rate": 8.347594673538705e-06, "loss": 0.6071, "step": 4529 }, { "epoch": 0.58, "grad_norm": 0.743336975063652, "learning_rate": 8.346828345225806e-06, "loss": 0.6383, "step": 4530 }, { "epoch": 0.58, "grad_norm": 0.544228793325572, "learning_rate": 8.34606187444912e-06, "loss": 0.4821, "step": 4531 }, { "epoch": 0.58, "grad_norm": 0.7831383892292381, "learning_rate": 8.34529526124127e-06, "loss": 0.6049, "step": 4532 }, { "epoch": 0.58, "grad_norm": 0.7421778872396227, "learning_rate": 8.344528505634894e-06, "loss": 0.6024, "step": 4533 }, { "epoch": 0.58, "grad_norm": 0.8358407921835102, "learning_rate": 8.343761607662625e-06, "loss": 0.6336, "step": 4534 }, { "epoch": 0.58, "grad_norm": 0.7708064486421488, "learning_rate": 8.34299456735711e-06, "loss": 0.622, "step": 4535 }, { "epoch": 0.58, "grad_norm": 0.6063444648625697, "learning_rate": 8.342227384750998e-06, "loss": 0.5267, "step": 4536 }, { "epoch": 0.58, "grad_norm": 0.5994602788328899, "learning_rate": 8.341460059876947e-06, "loss": 0.5056, "step": 4537 }, { "epoch": 0.58, "grad_norm": 0.563371386041275, "learning_rate": 8.34069259276762e-06, "loss": 0.4971, "step": 4538 }, { "epoch": 0.58, "grad_norm": 0.6961521146768033, "learning_rate": 8.339924983455684e-06, "loss": 0.5955, "step": 4539 }, { "epoch": 0.58, "grad_norm": 0.7742789902056458, "learning_rate": 8.339157231973815e-06, "loss": 0.5895, "step": 4540 }, { "epoch": 0.58, "grad_norm": 0.6445128213410857, "learning_rate": 8.338389338354693e-06, "loss": 0.5341, "step": 4541 }, { "epoch": 0.58, "grad_norm": 0.6121826947430126, "learning_rate": 8.337621302631003e-06, "loss": 0.5428, "step": 4542 }, { "epoch": 0.58, "grad_norm": 0.7345697368035174, "learning_rate": 8.33685312483544e-06, "loss": 0.5354, "step": 4543 }, { "epoch": 0.58, "grad_norm": 0.8811001564261831, "learning_rate": 8.336084805000703e-06, "loss": 0.6578, "step": 4544 }, { "epoch": 0.58, "grad_norm": 0.7278108537264646, "learning_rate": 8.335316343159498e-06, "loss": 0.57, "step": 4545 }, { "epoch": 0.58, "grad_norm": 0.6591807933465704, "learning_rate": 8.334547739344534e-06, "loss": 0.5237, "step": 4546 }, { "epoch": 0.58, "grad_norm": 0.6241909819167553, "learning_rate": 8.333778993588529e-06, "loss": 0.5383, "step": 4547 }, { "epoch": 0.58, "grad_norm": 0.5406844452349486, "learning_rate": 8.333010105924201e-06, "loss": 0.4765, "step": 4548 }, { "epoch": 0.58, "grad_norm": 0.5974736064406457, "learning_rate": 8.33224107638429e-06, "loss": 0.528, "step": 4549 }, { "epoch": 0.58, "grad_norm": 0.6932640265580899, "learning_rate": 8.331471905001521e-06, "loss": 0.5598, "step": 4550 }, { "epoch": 0.58, "grad_norm": 0.6459708961663617, "learning_rate": 8.330702591808639e-06, "loss": 0.4623, "step": 4551 }, { "epoch": 0.58, "grad_norm": 0.6357239836619667, "learning_rate": 8.329933136838391e-06, "loss": 0.5339, "step": 4552 }, { "epoch": 0.58, "grad_norm": 0.8551503830518864, "learning_rate": 8.32916354012353e-06, "loss": 0.6803, "step": 4553 }, { "epoch": 0.58, "grad_norm": 0.5869725208040968, "learning_rate": 8.328393801696817e-06, "loss": 0.4797, "step": 4554 }, { "epoch": 0.58, "grad_norm": 0.6419183139505903, "learning_rate": 8.327623921591014e-06, "loss": 0.5271, "step": 4555 }, { "epoch": 0.58, "grad_norm": 0.8884931704165502, "learning_rate": 8.326853899838895e-06, "loss": 0.6423, "step": 4556 }, { "epoch": 0.58, "grad_norm": 0.7618765452436912, "learning_rate": 8.326083736473238e-06, "loss": 0.6033, "step": 4557 }, { "epoch": 0.58, "grad_norm": 0.821669465018436, "learning_rate": 8.325313431526824e-06, "loss": 0.6335, "step": 4558 }, { "epoch": 0.58, "grad_norm": 0.5591507822069133, "learning_rate": 8.324542985032444e-06, "loss": 0.5383, "step": 4559 }, { "epoch": 0.58, "grad_norm": 0.5881586167867418, "learning_rate": 8.32377239702289e-06, "loss": 0.544, "step": 4560 }, { "epoch": 0.58, "grad_norm": 0.9188463807647256, "learning_rate": 8.32300166753097e-06, "loss": 0.5166, "step": 4561 }, { "epoch": 0.58, "grad_norm": 0.8610061131662988, "learning_rate": 8.322230796589486e-06, "loss": 0.595, "step": 4562 }, { "epoch": 0.58, "grad_norm": 0.9617603272577288, "learning_rate": 8.321459784231254e-06, "loss": 0.6348, "step": 4563 }, { "epoch": 0.58, "grad_norm": 0.7006695571472142, "learning_rate": 8.320688630489093e-06, "loss": 0.5687, "step": 4564 }, { "epoch": 0.58, "grad_norm": 0.7845886099944425, "learning_rate": 8.319917335395827e-06, "loss": 0.5882, "step": 4565 }, { "epoch": 0.58, "grad_norm": 0.7221957013093857, "learning_rate": 8.319145898984291e-06, "loss": 0.5141, "step": 4566 }, { "epoch": 0.58, "grad_norm": 1.001814019951614, "learning_rate": 8.318374321287319e-06, "loss": 0.6619, "step": 4567 }, { "epoch": 0.58, "grad_norm": 0.5920898880186863, "learning_rate": 8.317602602337755e-06, "loss": 0.5261, "step": 4568 }, { "epoch": 0.58, "grad_norm": 0.8901935217083875, "learning_rate": 8.316830742168452e-06, "loss": 0.69, "step": 4569 }, { "epoch": 0.58, "grad_norm": 0.737862041467974, "learning_rate": 8.316058740812263e-06, "loss": 0.5711, "step": 4570 }, { "epoch": 0.58, "grad_norm": 0.6741381417129442, "learning_rate": 8.31528659830205e-06, "loss": 0.5838, "step": 4571 }, { "epoch": 0.58, "grad_norm": 0.6252054561230648, "learning_rate": 8.314514314670681e-06, "loss": 0.5187, "step": 4572 }, { "epoch": 0.58, "grad_norm": 0.6362798972986528, "learning_rate": 8.313741889951028e-06, "loss": 0.5418, "step": 4573 }, { "epoch": 0.58, "grad_norm": 0.7232528490922306, "learning_rate": 8.312969324175972e-06, "loss": 0.5919, "step": 4574 }, { "epoch": 0.58, "grad_norm": 0.8233850690240231, "learning_rate": 8.312196617378399e-06, "loss": 0.6409, "step": 4575 }, { "epoch": 0.58, "grad_norm": 0.6412296015838452, "learning_rate": 8.311423769591201e-06, "loss": 0.5617, "step": 4576 }, { "epoch": 0.58, "grad_norm": 0.8807926764936764, "learning_rate": 8.310650780847275e-06, "loss": 0.6779, "step": 4577 }, { "epoch": 0.58, "grad_norm": 0.6088078919307018, "learning_rate": 8.309877651179523e-06, "loss": 0.5354, "step": 4578 }, { "epoch": 0.58, "grad_norm": 0.7457311656456699, "learning_rate": 8.309104380620857e-06, "loss": 0.5341, "step": 4579 }, { "epoch": 0.58, "grad_norm": 1.5331643572616342, "learning_rate": 8.308330969204192e-06, "loss": 0.6517, "step": 4580 }, { "epoch": 0.58, "grad_norm": 0.6157849670100451, "learning_rate": 8.30755741696245e-06, "loss": 0.5307, "step": 4581 }, { "epoch": 0.58, "grad_norm": 0.8538888377847706, "learning_rate": 8.306783723928558e-06, "loss": 0.6179, "step": 4582 }, { "epoch": 0.58, "grad_norm": 0.5696347142685619, "learning_rate": 8.30600989013545e-06, "loss": 0.5387, "step": 4583 }, { "epoch": 0.58, "grad_norm": 0.7679757721676566, "learning_rate": 8.305235915616065e-06, "loss": 0.6794, "step": 4584 }, { "epoch": 0.58, "grad_norm": 0.6822017456418559, "learning_rate": 8.30446180040335e-06, "loss": 0.5187, "step": 4585 }, { "epoch": 0.58, "grad_norm": 0.6171342767274044, "learning_rate": 8.303687544530254e-06, "loss": 0.5268, "step": 4586 }, { "epoch": 0.58, "grad_norm": 0.5780266415667538, "learning_rate": 8.302913148029738e-06, "loss": 0.528, "step": 4587 }, { "epoch": 0.58, "grad_norm": 0.6674256120796203, "learning_rate": 8.302138610934762e-06, "loss": 0.5698, "step": 4588 }, { "epoch": 0.58, "grad_norm": 0.8206296189598848, "learning_rate": 8.3013639332783e-06, "loss": 0.6536, "step": 4589 }, { "epoch": 0.58, "grad_norm": 0.7877189165111409, "learning_rate": 8.300589115093324e-06, "loss": 0.635, "step": 4590 }, { "epoch": 0.58, "grad_norm": 0.5616090193575192, "learning_rate": 8.299814156412816e-06, "loss": 0.5161, "step": 4591 }, { "epoch": 0.59, "grad_norm": 0.5988902365227176, "learning_rate": 8.299039057269764e-06, "loss": 0.5687, "step": 4592 }, { "epoch": 0.59, "grad_norm": 0.6960419334930371, "learning_rate": 8.298263817697162e-06, "loss": 0.5795, "step": 4593 }, { "epoch": 0.59, "grad_norm": 0.5707883144842965, "learning_rate": 8.297488437728008e-06, "loss": 0.5011, "step": 4594 }, { "epoch": 0.59, "grad_norm": 0.6575111612549288, "learning_rate": 8.296712917395311e-06, "loss": 0.5649, "step": 4595 }, { "epoch": 0.59, "grad_norm": 0.745300592831123, "learning_rate": 8.295937256732077e-06, "loss": 0.6651, "step": 4596 }, { "epoch": 0.59, "grad_norm": 0.6442942869675553, "learning_rate": 8.295161455771327e-06, "loss": 0.5579, "step": 4597 }, { "epoch": 0.59, "grad_norm": 0.6301425841386397, "learning_rate": 8.294385514546083e-06, "loss": 0.5346, "step": 4598 }, { "epoch": 0.59, "grad_norm": 0.5985101956372935, "learning_rate": 8.293609433089378e-06, "loss": 0.5695, "step": 4599 }, { "epoch": 0.59, "grad_norm": 0.5838525089912457, "learning_rate": 8.292833211434243e-06, "loss": 0.5491, "step": 4600 }, { "epoch": 0.59, "grad_norm": 0.608297559285062, "learning_rate": 8.29205684961372e-06, "loss": 0.5415, "step": 4601 }, { "epoch": 0.59, "grad_norm": 0.5850325101804145, "learning_rate": 8.291280347660856e-06, "loss": 0.53, "step": 4602 }, { "epoch": 0.59, "grad_norm": 0.6541675043155815, "learning_rate": 8.290503705608707e-06, "loss": 0.5075, "step": 4603 }, { "epoch": 0.59, "grad_norm": 0.732558482290716, "learning_rate": 8.28972692349033e-06, "loss": 0.6011, "step": 4604 }, { "epoch": 0.59, "grad_norm": 0.6349321427691408, "learning_rate": 8.288950001338788e-06, "loss": 0.5052, "step": 4605 }, { "epoch": 0.59, "grad_norm": 0.7066040299986205, "learning_rate": 8.288172939187155e-06, "loss": 0.5979, "step": 4606 }, { "epoch": 0.59, "grad_norm": 0.6263607674648934, "learning_rate": 8.287395737068509e-06, "loss": 0.5699, "step": 4607 }, { "epoch": 0.59, "grad_norm": 0.7576194191057484, "learning_rate": 8.286618395015931e-06, "loss": 0.5935, "step": 4608 }, { "epoch": 0.59, "grad_norm": 0.6930537273041669, "learning_rate": 8.28584091306251e-06, "loss": 0.5415, "step": 4609 }, { "epoch": 0.59, "grad_norm": 0.8121738648037318, "learning_rate": 8.285063291241342e-06, "loss": 0.6349, "step": 4610 }, { "epoch": 0.59, "grad_norm": 0.8005553728820987, "learning_rate": 8.284285529585526e-06, "loss": 0.6458, "step": 4611 }, { "epoch": 0.59, "grad_norm": 0.6713183728884552, "learning_rate": 8.283507628128172e-06, "loss": 0.5637, "step": 4612 }, { "epoch": 0.59, "grad_norm": 0.5326842064130279, "learning_rate": 8.282729586902389e-06, "loss": 0.5141, "step": 4613 }, { "epoch": 0.59, "grad_norm": 0.7089873735358927, "learning_rate": 8.281951405941298e-06, "loss": 0.5704, "step": 4614 }, { "epoch": 0.59, "grad_norm": 0.748539003985585, "learning_rate": 8.281173085278024e-06, "loss": 0.5747, "step": 4615 }, { "epoch": 0.59, "grad_norm": 0.768531314502041, "learning_rate": 8.280394624945697e-06, "loss": 0.5113, "step": 4616 }, { "epoch": 0.59, "grad_norm": 0.596118209725609, "learning_rate": 8.279616024977453e-06, "loss": 0.5245, "step": 4617 }, { "epoch": 0.59, "grad_norm": 0.8051177654560966, "learning_rate": 8.278837285406436e-06, "loss": 0.5446, "step": 4618 }, { "epoch": 0.59, "grad_norm": 0.5759237485926437, "learning_rate": 8.278058406265792e-06, "loss": 0.51, "step": 4619 }, { "epoch": 0.59, "grad_norm": 0.6625567295722475, "learning_rate": 8.277279387588677e-06, "loss": 0.6042, "step": 4620 }, { "epoch": 0.59, "grad_norm": 0.6024807647703426, "learning_rate": 8.276500229408251e-06, "loss": 0.5292, "step": 4621 }, { "epoch": 0.59, "grad_norm": 0.7874690897198057, "learning_rate": 8.27572093175768e-06, "loss": 0.6525, "step": 4622 }, { "epoch": 0.59, "grad_norm": 0.6261995981712538, "learning_rate": 8.27494149467014e-06, "loss": 0.5347, "step": 4623 }, { "epoch": 0.59, "grad_norm": 0.7515888419419297, "learning_rate": 8.274161918178803e-06, "loss": 0.6693, "step": 4624 }, { "epoch": 0.59, "grad_norm": 0.6524185402631865, "learning_rate": 8.273382202316855e-06, "loss": 0.5265, "step": 4625 }, { "epoch": 0.59, "grad_norm": 0.6747795621916263, "learning_rate": 8.272602347117488e-06, "loss": 0.5475, "step": 4626 }, { "epoch": 0.59, "grad_norm": 0.7020670020957112, "learning_rate": 8.271822352613896e-06, "loss": 0.6218, "step": 4627 }, { "epoch": 0.59, "grad_norm": 0.5411099911561422, "learning_rate": 8.271042218839284e-06, "loss": 0.5111, "step": 4628 }, { "epoch": 0.59, "grad_norm": 0.5493993465983869, "learning_rate": 8.270261945826855e-06, "loss": 0.5359, "step": 4629 }, { "epoch": 0.59, "grad_norm": 0.792438775915959, "learning_rate": 8.269481533609827e-06, "loss": 0.6317, "step": 4630 }, { "epoch": 0.59, "grad_norm": 0.8024391520749959, "learning_rate": 8.268700982221416e-06, "loss": 0.6572, "step": 4631 }, { "epoch": 0.59, "grad_norm": 0.9578000117038373, "learning_rate": 8.267920291694851e-06, "loss": 0.5122, "step": 4632 }, { "epoch": 0.59, "grad_norm": 0.7981641600501669, "learning_rate": 8.267139462063363e-06, "loss": 0.6319, "step": 4633 }, { "epoch": 0.59, "grad_norm": 0.8111421228066134, "learning_rate": 8.266358493360186e-06, "loss": 0.6145, "step": 4634 }, { "epoch": 0.59, "grad_norm": 0.5781219843472877, "learning_rate": 8.265577385618566e-06, "loss": 0.5159, "step": 4635 }, { "epoch": 0.59, "grad_norm": 0.6945359278874309, "learning_rate": 8.264796138871753e-06, "loss": 0.5012, "step": 4636 }, { "epoch": 0.59, "grad_norm": 0.6453162545353781, "learning_rate": 8.264014753153e-06, "loss": 0.5841, "step": 4637 }, { "epoch": 0.59, "grad_norm": 0.666539692458954, "learning_rate": 8.26323322849557e-06, "loss": 0.5674, "step": 4638 }, { "epoch": 0.59, "grad_norm": 0.5924871182551501, "learning_rate": 8.262451564932729e-06, "loss": 0.5025, "step": 4639 }, { "epoch": 0.59, "grad_norm": 0.6339368540742805, "learning_rate": 8.261669762497752e-06, "loss": 0.5289, "step": 4640 }, { "epoch": 0.59, "grad_norm": 0.7127930839254408, "learning_rate": 8.260887821223915e-06, "loss": 0.5792, "step": 4641 }, { "epoch": 0.59, "grad_norm": 0.7262930078915326, "learning_rate": 8.260105741144502e-06, "loss": 0.5962, "step": 4642 }, { "epoch": 0.59, "grad_norm": 0.5935238702267821, "learning_rate": 8.259323522292808e-06, "loss": 0.4399, "step": 4643 }, { "epoch": 0.59, "grad_norm": 0.6121339348837413, "learning_rate": 8.258541164702126e-06, "loss": 0.5426, "step": 4644 }, { "epoch": 0.59, "grad_norm": 0.824277049920599, "learning_rate": 8.257758668405758e-06, "loss": 0.6448, "step": 4645 }, { "epoch": 0.59, "grad_norm": 0.5560645771345775, "learning_rate": 8.256976033437015e-06, "loss": 0.5562, "step": 4646 }, { "epoch": 0.59, "grad_norm": 0.6901256974527458, "learning_rate": 8.25619325982921e-06, "loss": 0.5405, "step": 4647 }, { "epoch": 0.59, "grad_norm": 0.779128310073951, "learning_rate": 8.255410347615663e-06, "loss": 0.6679, "step": 4648 }, { "epoch": 0.59, "grad_norm": 0.6752717566763254, "learning_rate": 8.2546272968297e-06, "loss": 0.5654, "step": 4649 }, { "epoch": 0.59, "grad_norm": 0.8439658033365739, "learning_rate": 8.253844107504653e-06, "loss": 0.6299, "step": 4650 }, { "epoch": 0.59, "grad_norm": 0.8188284309297219, "learning_rate": 8.253060779673863e-06, "loss": 0.6453, "step": 4651 }, { "epoch": 0.59, "grad_norm": 0.7889139822860217, "learning_rate": 8.252277313370668e-06, "loss": 0.6143, "step": 4652 }, { "epoch": 0.59, "grad_norm": 0.6781967242411078, "learning_rate": 8.251493708628422e-06, "loss": 0.5723, "step": 4653 }, { "epoch": 0.59, "grad_norm": 1.1367495885264813, "learning_rate": 8.250709965480478e-06, "loss": 0.6713, "step": 4654 }, { "epoch": 0.59, "grad_norm": 0.5367243516615912, "learning_rate": 8.2499260839602e-06, "loss": 0.537, "step": 4655 }, { "epoch": 0.59, "grad_norm": 0.7737990284411801, "learning_rate": 8.249142064100952e-06, "loss": 0.5988, "step": 4656 }, { "epoch": 0.59, "grad_norm": 0.6965216708231662, "learning_rate": 8.248357905936111e-06, "loss": 0.566, "step": 4657 }, { "epoch": 0.59, "grad_norm": 0.7603000484519947, "learning_rate": 8.247573609499054e-06, "loss": 0.5566, "step": 4658 }, { "epoch": 0.59, "grad_norm": 0.8556547495436599, "learning_rate": 8.246789174823164e-06, "loss": 0.6139, "step": 4659 }, { "epoch": 0.59, "grad_norm": 0.8039671031347132, "learning_rate": 8.246004601941834e-06, "loss": 0.6099, "step": 4660 }, { "epoch": 0.59, "grad_norm": 0.7857399076760495, "learning_rate": 8.245219890888463e-06, "loss": 0.6252, "step": 4661 }, { "epoch": 0.59, "grad_norm": 0.6986999667506865, "learning_rate": 8.244435041696453e-06, "loss": 0.5901, "step": 4662 }, { "epoch": 0.59, "grad_norm": 0.6663399209358067, "learning_rate": 8.243650054399209e-06, "loss": 0.5309, "step": 4663 }, { "epoch": 0.59, "grad_norm": 0.6288798647661501, "learning_rate": 8.242864929030145e-06, "loss": 0.5573, "step": 4664 }, { "epoch": 0.59, "grad_norm": 0.7463076296848864, "learning_rate": 8.242079665622686e-06, "loss": 0.5377, "step": 4665 }, { "epoch": 0.59, "grad_norm": 0.8739340742472499, "learning_rate": 8.241294264210256e-06, "loss": 0.6134, "step": 4666 }, { "epoch": 0.59, "grad_norm": 0.777090426151362, "learning_rate": 8.240508724826287e-06, "loss": 0.5906, "step": 4667 }, { "epoch": 0.59, "grad_norm": 0.699591831918812, "learning_rate": 8.239723047504216e-06, "loss": 0.5446, "step": 4668 }, { "epoch": 0.59, "grad_norm": 0.5941513675925244, "learning_rate": 8.23893723227749e-06, "loss": 0.5582, "step": 4669 }, { "epoch": 0.59, "grad_norm": 0.8144842680043904, "learning_rate": 8.238151279179553e-06, "loss": 0.6324, "step": 4670 }, { "epoch": 0.6, "grad_norm": 0.785511863474483, "learning_rate": 8.237365188243865e-06, "loss": 0.6472, "step": 4671 }, { "epoch": 0.6, "grad_norm": 0.7463472417078749, "learning_rate": 8.236578959503884e-06, "loss": 0.6375, "step": 4672 }, { "epoch": 0.6, "grad_norm": 0.6446009794678811, "learning_rate": 8.235792592993082e-06, "loss": 0.5261, "step": 4673 }, { "epoch": 0.6, "grad_norm": 0.6573585926396419, "learning_rate": 8.235006088744929e-06, "loss": 0.5246, "step": 4674 }, { "epoch": 0.6, "grad_norm": 0.6582561884637631, "learning_rate": 8.234219446792904e-06, "loss": 0.5402, "step": 4675 }, { "epoch": 0.6, "grad_norm": 0.7633191541450485, "learning_rate": 8.233432667170494e-06, "loss": 0.5676, "step": 4676 }, { "epoch": 0.6, "grad_norm": 0.7939556371781813, "learning_rate": 8.232645749911185e-06, "loss": 0.6211, "step": 4677 }, { "epoch": 0.6, "grad_norm": 1.008517296327773, "learning_rate": 8.231858695048479e-06, "loss": 0.6066, "step": 4678 }, { "epoch": 0.6, "grad_norm": 0.5938877523332594, "learning_rate": 8.231071502615873e-06, "loss": 0.5422, "step": 4679 }, { "epoch": 0.6, "grad_norm": 0.5268837938765629, "learning_rate": 8.230284172646881e-06, "loss": 0.5053, "step": 4680 }, { "epoch": 0.6, "grad_norm": 0.5833814210144035, "learning_rate": 8.229496705175012e-06, "loss": 0.5064, "step": 4681 }, { "epoch": 0.6, "grad_norm": 0.5674897173502002, "learning_rate": 8.228709100233789e-06, "loss": 0.5552, "step": 4682 }, { "epoch": 0.6, "grad_norm": 0.5026327757855346, "learning_rate": 8.227921357856738e-06, "loss": 0.463, "step": 4683 }, { "epoch": 0.6, "grad_norm": 0.6045577676720874, "learning_rate": 8.22713347807739e-06, "loss": 0.4916, "step": 4684 }, { "epoch": 0.6, "grad_norm": 0.6782690097154433, "learning_rate": 8.22634546092928e-06, "loss": 0.5303, "step": 4685 }, { "epoch": 0.6, "grad_norm": 0.6143614157222634, "learning_rate": 8.225557306445956e-06, "loss": 0.5555, "step": 4686 }, { "epoch": 0.6, "grad_norm": 0.768168823360345, "learning_rate": 8.224769014660964e-06, "loss": 0.5589, "step": 4687 }, { "epoch": 0.6, "grad_norm": 0.6340062847522439, "learning_rate": 8.223980585607861e-06, "loss": 0.5835, "step": 4688 }, { "epoch": 0.6, "grad_norm": 0.6927697622048208, "learning_rate": 8.223192019320206e-06, "loss": 0.5845, "step": 4689 }, { "epoch": 0.6, "grad_norm": 0.5803929956917045, "learning_rate": 8.222403315831565e-06, "loss": 0.5398, "step": 4690 }, { "epoch": 0.6, "grad_norm": 0.6620576144270319, "learning_rate": 8.221614475175511e-06, "loss": 0.5646, "step": 4691 }, { "epoch": 0.6, "grad_norm": 0.6485821328922843, "learning_rate": 8.220825497385628e-06, "loss": 0.5433, "step": 4692 }, { "epoch": 0.6, "grad_norm": 0.758682620589759, "learning_rate": 8.220036382495494e-06, "loss": 0.5635, "step": 4693 }, { "epoch": 0.6, "grad_norm": 0.5472629883164765, "learning_rate": 8.219247130538702e-06, "loss": 0.51, "step": 4694 }, { "epoch": 0.6, "grad_norm": 0.7145393761055885, "learning_rate": 8.218457741548846e-06, "loss": 0.536, "step": 4695 }, { "epoch": 0.6, "grad_norm": 0.5911447726164596, "learning_rate": 8.217668215559528e-06, "loss": 0.5062, "step": 4696 }, { "epoch": 0.6, "grad_norm": 0.8202246234094444, "learning_rate": 8.216878552604356e-06, "loss": 0.6482, "step": 4697 }, { "epoch": 0.6, "grad_norm": 0.7048611594369557, "learning_rate": 8.216088752716945e-06, "loss": 0.5861, "step": 4698 }, { "epoch": 0.6, "grad_norm": 0.5855694900257165, "learning_rate": 8.215298815930912e-06, "loss": 0.5338, "step": 4699 }, { "epoch": 0.6, "grad_norm": 0.7717944753042869, "learning_rate": 8.214508742279884e-06, "loss": 0.661, "step": 4700 }, { "epoch": 0.6, "grad_norm": 0.5407674945319926, "learning_rate": 8.21371853179749e-06, "loss": 0.5083, "step": 4701 }, { "epoch": 0.6, "grad_norm": 0.7767660691205729, "learning_rate": 8.212928184517368e-06, "loss": 0.5481, "step": 4702 }, { "epoch": 0.6, "grad_norm": 0.5305472202996602, "learning_rate": 8.212137700473159e-06, "loss": 0.4873, "step": 4703 }, { "epoch": 0.6, "grad_norm": 0.8007867522899832, "learning_rate": 8.211347079698515e-06, "loss": 0.6342, "step": 4704 }, { "epoch": 0.6, "grad_norm": 0.5979449171900671, "learning_rate": 8.210556322227087e-06, "loss": 0.5908, "step": 4705 }, { "epoch": 0.6, "grad_norm": 0.7231031042642851, "learning_rate": 8.209765428092535e-06, "loss": 0.5683, "step": 4706 }, { "epoch": 0.6, "grad_norm": 0.7289049404047805, "learning_rate": 8.208974397328527e-06, "loss": 0.6381, "step": 4707 }, { "epoch": 0.6, "grad_norm": 0.7236172218250517, "learning_rate": 8.208183229968731e-06, "loss": 0.5567, "step": 4708 }, { "epoch": 0.6, "grad_norm": 0.7738967297665412, "learning_rate": 8.20739192604683e-06, "loss": 0.5994, "step": 4709 }, { "epoch": 0.6, "grad_norm": 0.7728467453810064, "learning_rate": 8.206600485596501e-06, "loss": 0.6049, "step": 4710 }, { "epoch": 0.6, "grad_norm": 0.8767782367508747, "learning_rate": 8.205808908651439e-06, "loss": 0.6213, "step": 4711 }, { "epoch": 0.6, "grad_norm": 0.7429710637834168, "learning_rate": 8.205017195245333e-06, "loss": 0.6152, "step": 4712 }, { "epoch": 0.6, "grad_norm": 0.5897784338241513, "learning_rate": 8.20422534541189e-06, "loss": 0.5291, "step": 4713 }, { "epoch": 0.6, "grad_norm": 0.5771387514393338, "learning_rate": 8.203433359184811e-06, "loss": 0.5108, "step": 4714 }, { "epoch": 0.6, "grad_norm": 0.8162661306598429, "learning_rate": 8.20264123659781e-06, "loss": 0.5798, "step": 4715 }, { "epoch": 0.6, "grad_norm": 0.6520075500920509, "learning_rate": 8.201848977684608e-06, "loss": 0.5716, "step": 4716 }, { "epoch": 0.6, "grad_norm": 0.9540425134830905, "learning_rate": 8.201056582478926e-06, "loss": 0.6701, "step": 4717 }, { "epoch": 0.6, "grad_norm": 0.5588207886438106, "learning_rate": 8.200264051014494e-06, "loss": 0.5205, "step": 4718 }, { "epoch": 0.6, "grad_norm": 1.0424848989956124, "learning_rate": 8.199471383325049e-06, "loss": 0.6383, "step": 4719 }, { "epoch": 0.6, "grad_norm": 0.6467697823264894, "learning_rate": 8.19867857944433e-06, "loss": 0.5593, "step": 4720 }, { "epoch": 0.6, "grad_norm": 0.8636816125209708, "learning_rate": 8.197885639406085e-06, "loss": 0.664, "step": 4721 }, { "epoch": 0.6, "grad_norm": 0.7121787750414755, "learning_rate": 8.197092563244067e-06, "loss": 0.6414, "step": 4722 }, { "epoch": 0.6, "grad_norm": 0.5867722333401326, "learning_rate": 8.196299350992036e-06, "loss": 0.5231, "step": 4723 }, { "epoch": 0.6, "grad_norm": 0.8165973481902713, "learning_rate": 8.195506002683756e-06, "loss": 0.6644, "step": 4724 }, { "epoch": 0.6, "grad_norm": 0.9932537819337868, "learning_rate": 8.194712518352997e-06, "loss": 0.5714, "step": 4725 }, { "epoch": 0.6, "grad_norm": 0.7202564353050699, "learning_rate": 8.193918898033533e-06, "loss": 0.6067, "step": 4726 }, { "epoch": 0.6, "grad_norm": 0.6208630082852726, "learning_rate": 8.19312514175915e-06, "loss": 0.5114, "step": 4727 }, { "epoch": 0.6, "grad_norm": 0.7230895260903912, "learning_rate": 8.192331249563632e-06, "loss": 0.6077, "step": 4728 }, { "epoch": 0.6, "grad_norm": 0.5531853516276057, "learning_rate": 8.191537221480775e-06, "loss": 0.4787, "step": 4729 }, { "epoch": 0.6, "grad_norm": 0.5858968568748244, "learning_rate": 8.190743057544376e-06, "loss": 0.5306, "step": 4730 }, { "epoch": 0.6, "grad_norm": 0.76533443112363, "learning_rate": 8.189948757788242e-06, "loss": 0.6236, "step": 4731 }, { "epoch": 0.6, "grad_norm": 0.6381047834579116, "learning_rate": 8.189154322246184e-06, "loss": 0.521, "step": 4732 }, { "epoch": 0.6, "grad_norm": 0.8094782169375087, "learning_rate": 8.188359750952015e-06, "loss": 0.622, "step": 4733 }, { "epoch": 0.6, "grad_norm": 0.6167793767933879, "learning_rate": 8.187565043939562e-06, "loss": 0.5403, "step": 4734 }, { "epoch": 0.6, "grad_norm": 0.8147174576845679, "learning_rate": 8.18677020124265e-06, "loss": 0.6379, "step": 4735 }, { "epoch": 0.6, "grad_norm": 1.14250842422233, "learning_rate": 8.185975222895115e-06, "loss": 0.6127, "step": 4736 }, { "epoch": 0.6, "grad_norm": 0.6121115758075073, "learning_rate": 8.185180108930795e-06, "loss": 0.5385, "step": 4737 }, { "epoch": 0.6, "grad_norm": 0.7378590011021329, "learning_rate": 8.184384859383537e-06, "loss": 0.5876, "step": 4738 }, { "epoch": 0.6, "grad_norm": 0.8116117048304031, "learning_rate": 8.18358947428719e-06, "loss": 0.6028, "step": 4739 }, { "epoch": 0.6, "grad_norm": 0.7827381595688337, "learning_rate": 8.182793953675613e-06, "loss": 0.5824, "step": 4740 }, { "epoch": 0.6, "grad_norm": 0.7529424867561774, "learning_rate": 8.181998297582668e-06, "loss": 0.6171, "step": 4741 }, { "epoch": 0.6, "grad_norm": 0.8637249480518574, "learning_rate": 8.181202506042224e-06, "loss": 0.6406, "step": 4742 }, { "epoch": 0.6, "grad_norm": 0.84913266113476, "learning_rate": 8.180406579088154e-06, "loss": 0.5968, "step": 4743 }, { "epoch": 0.6, "grad_norm": 0.8594606251697315, "learning_rate": 8.179610516754341e-06, "loss": 0.6496, "step": 4744 }, { "epoch": 0.6, "grad_norm": 0.5428580741176288, "learning_rate": 8.178814319074669e-06, "loss": 0.5436, "step": 4745 }, { "epoch": 0.6, "grad_norm": 0.6550157569932077, "learning_rate": 8.178017986083026e-06, "loss": 0.5443, "step": 4746 }, { "epoch": 0.6, "grad_norm": 0.7766491928055453, "learning_rate": 8.177221517813317e-06, "loss": 0.5871, "step": 4747 }, { "epoch": 0.6, "grad_norm": 0.7229487511566859, "learning_rate": 8.17642491429944e-06, "loss": 0.6646, "step": 4748 }, { "epoch": 0.61, "grad_norm": 0.8465769643127085, "learning_rate": 8.175628175575303e-06, "loss": 0.6456, "step": 4749 }, { "epoch": 0.61, "grad_norm": 0.9591417564551952, "learning_rate": 8.174831301674823e-06, "loss": 0.6841, "step": 4750 }, { "epoch": 0.61, "grad_norm": 0.6584652317916735, "learning_rate": 8.17403429263192e-06, "loss": 0.5447, "step": 4751 }, { "epoch": 0.61, "grad_norm": 0.6351246382884953, "learning_rate": 8.17323714848052e-06, "loss": 0.4827, "step": 4752 }, { "epoch": 0.61, "grad_norm": 0.6794344882886584, "learning_rate": 8.172439869254553e-06, "loss": 0.5947, "step": 4753 }, { "epoch": 0.61, "grad_norm": 0.6075077998455788, "learning_rate": 8.171642454987962e-06, "loss": 0.59, "step": 4754 }, { "epoch": 0.61, "grad_norm": 0.6062427020311038, "learning_rate": 8.170844905714684e-06, "loss": 0.5528, "step": 4755 }, { "epoch": 0.61, "grad_norm": 0.7944220427103551, "learning_rate": 8.170047221468673e-06, "loss": 0.6177, "step": 4756 }, { "epoch": 0.61, "grad_norm": 0.5500138044856991, "learning_rate": 8.16924940228388e-06, "loss": 0.4603, "step": 4757 }, { "epoch": 0.61, "grad_norm": 0.7967601294356719, "learning_rate": 8.16845144819427e-06, "loss": 0.6302, "step": 4758 }, { "epoch": 0.61, "grad_norm": 0.5609469815530869, "learning_rate": 8.167653359233803e-06, "loss": 0.5194, "step": 4759 }, { "epoch": 0.61, "grad_norm": 0.6899560275405495, "learning_rate": 8.166855135436458e-06, "loss": 0.5395, "step": 4760 }, { "epoch": 0.61, "grad_norm": 0.640058892806277, "learning_rate": 8.166056776836207e-06, "loss": 0.5185, "step": 4761 }, { "epoch": 0.61, "grad_norm": 0.7415652042202203, "learning_rate": 8.16525828346704e-06, "loss": 0.6, "step": 4762 }, { "epoch": 0.61, "grad_norm": 0.7140960492609035, "learning_rate": 8.16445965536294e-06, "loss": 0.5209, "step": 4763 }, { "epoch": 0.61, "grad_norm": 0.6429796093746752, "learning_rate": 8.163660892557904e-06, "loss": 0.5481, "step": 4764 }, { "epoch": 0.61, "grad_norm": 0.6797984855087086, "learning_rate": 8.162861995085934e-06, "loss": 0.571, "step": 4765 }, { "epoch": 0.61, "grad_norm": 0.6390299248708973, "learning_rate": 8.162062962981036e-06, "loss": 0.5633, "step": 4766 }, { "epoch": 0.61, "grad_norm": 0.5368528206457558, "learning_rate": 8.161263796277223e-06, "loss": 0.4686, "step": 4767 }, { "epoch": 0.61, "grad_norm": 0.9471618086438857, "learning_rate": 8.160464495008512e-06, "loss": 0.7009, "step": 4768 }, { "epoch": 0.61, "grad_norm": 0.6900754661852971, "learning_rate": 8.159665059208926e-06, "loss": 0.563, "step": 4769 }, { "epoch": 0.61, "grad_norm": 0.6665293492580482, "learning_rate": 8.158865488912498e-06, "loss": 0.5591, "step": 4770 }, { "epoch": 0.61, "grad_norm": 0.6719251737210051, "learning_rate": 8.158065784153258e-06, "loss": 0.5602, "step": 4771 }, { "epoch": 0.61, "grad_norm": 0.6652603466476381, "learning_rate": 8.15726594496525e-06, "loss": 0.5329, "step": 4772 }, { "epoch": 0.61, "grad_norm": 0.6329341996750119, "learning_rate": 8.156465971382518e-06, "loss": 0.5747, "step": 4773 }, { "epoch": 0.61, "grad_norm": 0.7178080928686216, "learning_rate": 8.155665863439118e-06, "loss": 0.5932, "step": 4774 }, { "epoch": 0.61, "grad_norm": 0.5875864628155599, "learning_rate": 8.154865621169106e-06, "loss": 0.5702, "step": 4775 }, { "epoch": 0.61, "grad_norm": 0.703005532628864, "learning_rate": 8.154065244606547e-06, "loss": 0.5411, "step": 4776 }, { "epoch": 0.61, "grad_norm": 0.7231004309489556, "learning_rate": 8.153264733785506e-06, "loss": 0.6325, "step": 4777 }, { "epoch": 0.61, "grad_norm": 0.5795419795889204, "learning_rate": 8.152464088740066e-06, "loss": 0.5075, "step": 4778 }, { "epoch": 0.61, "grad_norm": 0.975411624673703, "learning_rate": 8.151663309504301e-06, "loss": 0.6291, "step": 4779 }, { "epoch": 0.61, "grad_norm": 0.6978743100613516, "learning_rate": 8.1508623961123e-06, "loss": 0.5392, "step": 4780 }, { "epoch": 0.61, "grad_norm": 0.7707502929035336, "learning_rate": 8.150061348598158e-06, "loss": 0.6428, "step": 4781 }, { "epoch": 0.61, "grad_norm": 0.7465758153972254, "learning_rate": 8.14926016699597e-06, "loss": 0.55, "step": 4782 }, { "epoch": 0.61, "grad_norm": 0.6830603107167149, "learning_rate": 8.148458851339837e-06, "loss": 0.5929, "step": 4783 }, { "epoch": 0.61, "grad_norm": 0.6912109698268275, "learning_rate": 8.147657401663875e-06, "loss": 0.5323, "step": 4784 }, { "epoch": 0.61, "grad_norm": 0.6295671870593303, "learning_rate": 8.146855818002194e-06, "loss": 0.547, "step": 4785 }, { "epoch": 0.61, "grad_norm": 0.5388794576131567, "learning_rate": 8.146054100388917e-06, "loss": 0.497, "step": 4786 }, { "epoch": 0.61, "grad_norm": 0.561849712112106, "learning_rate": 8.145252248858172e-06, "loss": 0.4933, "step": 4787 }, { "epoch": 0.61, "grad_norm": 0.6238080545111447, "learning_rate": 8.144450263444089e-06, "loss": 0.4867, "step": 4788 }, { "epoch": 0.61, "grad_norm": 0.6100209454034814, "learning_rate": 8.143648144180804e-06, "loss": 0.5575, "step": 4789 }, { "epoch": 0.61, "grad_norm": 0.6869051626211036, "learning_rate": 8.142845891102466e-06, "loss": 0.5246, "step": 4790 }, { "epoch": 0.61, "grad_norm": 0.7185336470393379, "learning_rate": 8.14204350424322e-06, "loss": 0.5567, "step": 4791 }, { "epoch": 0.61, "grad_norm": 0.7722515859289103, "learning_rate": 8.141240983637224e-06, "loss": 0.5818, "step": 4792 }, { "epoch": 0.61, "grad_norm": 0.7579152851004997, "learning_rate": 8.140438329318636e-06, "loss": 0.6249, "step": 4793 }, { "epoch": 0.61, "grad_norm": 0.718007431908234, "learning_rate": 8.139635541321624e-06, "loss": 0.5953, "step": 4794 }, { "epoch": 0.61, "grad_norm": 0.6915713114399602, "learning_rate": 8.138832619680361e-06, "loss": 0.5483, "step": 4795 }, { "epoch": 0.61, "grad_norm": 0.826928163300088, "learning_rate": 8.138029564429022e-06, "loss": 0.6276, "step": 4796 }, { "epoch": 0.61, "grad_norm": 0.6821590812335693, "learning_rate": 8.137226375601793e-06, "loss": 0.5521, "step": 4797 }, { "epoch": 0.61, "grad_norm": 0.7539433385643475, "learning_rate": 8.136423053232863e-06, "loss": 0.6502, "step": 4798 }, { "epoch": 0.61, "grad_norm": 0.8280041133982782, "learning_rate": 8.135619597356426e-06, "loss": 0.5772, "step": 4799 }, { "epoch": 0.61, "grad_norm": 0.7759999067614988, "learning_rate": 8.134816008006681e-06, "loss": 0.6089, "step": 4800 }, { "epoch": 0.61, "grad_norm": 0.5967147981107758, "learning_rate": 8.134012285217838e-06, "loss": 0.5352, "step": 4801 }, { "epoch": 0.61, "grad_norm": 0.605268819827656, "learning_rate": 8.133208429024106e-06, "loss": 0.5731, "step": 4802 }, { "epoch": 0.61, "grad_norm": 0.7784713763142074, "learning_rate": 8.132404439459702e-06, "loss": 0.5946, "step": 4803 }, { "epoch": 0.61, "grad_norm": 0.6422953028824958, "learning_rate": 8.131600316558852e-06, "loss": 0.5425, "step": 4804 }, { "epoch": 0.61, "grad_norm": 0.9488262430014444, "learning_rate": 8.130796060355784e-06, "loss": 0.5849, "step": 4805 }, { "epoch": 0.61, "grad_norm": 0.760895490255571, "learning_rate": 8.129991670884733e-06, "loss": 0.6297, "step": 4806 }, { "epoch": 0.61, "grad_norm": 0.797257959647494, "learning_rate": 8.129187148179938e-06, "loss": 0.5763, "step": 4807 }, { "epoch": 0.61, "grad_norm": 0.6082905580279685, "learning_rate": 8.128382492275644e-06, "loss": 0.5502, "step": 4808 }, { "epoch": 0.61, "grad_norm": 0.6848614964915353, "learning_rate": 8.127577703206106e-06, "loss": 0.6208, "step": 4809 }, { "epoch": 0.61, "grad_norm": 0.9584029166809982, "learning_rate": 8.12677278100558e-06, "loss": 0.6336, "step": 4810 }, { "epoch": 0.61, "grad_norm": 0.7183629341738105, "learning_rate": 8.125967725708328e-06, "loss": 0.5912, "step": 4811 }, { "epoch": 0.61, "grad_norm": 0.6785159993765555, "learning_rate": 8.125162537348619e-06, "loss": 0.5499, "step": 4812 }, { "epoch": 0.61, "grad_norm": 0.8716339175559804, "learning_rate": 8.124357215960727e-06, "loss": 0.6399, "step": 4813 }, { "epoch": 0.61, "grad_norm": 0.6433837587231448, "learning_rate": 8.123551761578931e-06, "loss": 0.5176, "step": 4814 }, { "epoch": 0.61, "grad_norm": 0.6443099290245619, "learning_rate": 8.122746174237518e-06, "loss": 0.5517, "step": 4815 }, { "epoch": 0.61, "grad_norm": 0.8781899269543718, "learning_rate": 8.121940453970783e-06, "loss": 0.6455, "step": 4816 }, { "epoch": 0.61, "grad_norm": 0.6489200588777149, "learning_rate": 8.121134600813015e-06, "loss": 0.5344, "step": 4817 }, { "epoch": 0.61, "grad_norm": 0.6396351274396583, "learning_rate": 8.120328614798523e-06, "loss": 0.5311, "step": 4818 }, { "epoch": 0.61, "grad_norm": 0.7540203510791869, "learning_rate": 8.119522495961612e-06, "loss": 0.6351, "step": 4819 }, { "epoch": 0.61, "grad_norm": 0.6818248485252133, "learning_rate": 8.118716244336597e-06, "loss": 0.5111, "step": 4820 }, { "epoch": 0.61, "grad_norm": 0.6778605624073916, "learning_rate": 8.117909859957798e-06, "loss": 0.5092, "step": 4821 }, { "epoch": 0.61, "grad_norm": 0.6924205642203811, "learning_rate": 8.11710334285954e-06, "loss": 0.5422, "step": 4822 }, { "epoch": 0.61, "grad_norm": 0.5901394765861796, "learning_rate": 8.116296693076154e-06, "loss": 0.5939, "step": 4823 }, { "epoch": 0.61, "grad_norm": 0.8388903654904268, "learning_rate": 8.115489910641974e-06, "loss": 0.6289, "step": 4824 }, { "epoch": 0.61, "grad_norm": 0.6627781865118006, "learning_rate": 8.114682995591345e-06, "loss": 0.5172, "step": 4825 }, { "epoch": 0.61, "grad_norm": 0.5900976791764801, "learning_rate": 8.113875947958616e-06, "loss": 0.5203, "step": 4826 }, { "epoch": 0.61, "grad_norm": 0.6738291442510475, "learning_rate": 8.113068767778134e-06, "loss": 0.5559, "step": 4827 }, { "epoch": 0.62, "grad_norm": 0.566831010030039, "learning_rate": 8.112261455084265e-06, "loss": 0.4868, "step": 4828 }, { "epoch": 0.62, "grad_norm": 0.6321945141159364, "learning_rate": 8.111454009911372e-06, "loss": 0.563, "step": 4829 }, { "epoch": 0.62, "grad_norm": 0.635862688232433, "learning_rate": 8.110646432293824e-06, "loss": 0.5428, "step": 4830 }, { "epoch": 0.62, "grad_norm": 0.7577421420038001, "learning_rate": 8.109838722265997e-06, "loss": 0.6351, "step": 4831 }, { "epoch": 0.62, "grad_norm": 0.7286074056901038, "learning_rate": 8.109030879862274e-06, "loss": 0.5856, "step": 4832 }, { "epoch": 0.62, "grad_norm": 0.633054677055702, "learning_rate": 8.108222905117042e-06, "loss": 0.5816, "step": 4833 }, { "epoch": 0.62, "grad_norm": 0.6770640871206239, "learning_rate": 8.107414798064691e-06, "loss": 0.5155, "step": 4834 }, { "epoch": 0.62, "grad_norm": 0.7535503897137129, "learning_rate": 8.106606558739625e-06, "loss": 0.6265, "step": 4835 }, { "epoch": 0.62, "grad_norm": 0.7024832634606328, "learning_rate": 8.105798187176244e-06, "loss": 0.553, "step": 4836 }, { "epoch": 0.62, "grad_norm": 0.8754086666788395, "learning_rate": 8.104989683408958e-06, "loss": 0.633, "step": 4837 }, { "epoch": 0.62, "grad_norm": 0.7669117862269665, "learning_rate": 8.104181047472184e-06, "loss": 0.6301, "step": 4838 }, { "epoch": 0.62, "grad_norm": 0.9213269387667131, "learning_rate": 8.103372279400341e-06, "loss": 0.6266, "step": 4839 }, { "epoch": 0.62, "grad_norm": 0.6657568430405255, "learning_rate": 8.10256337922786e-06, "loss": 0.5311, "step": 4840 }, { "epoch": 0.62, "grad_norm": 0.7967500853464504, "learning_rate": 8.101754346989168e-06, "loss": 0.6059, "step": 4841 }, { "epoch": 0.62, "grad_norm": 0.597754659123826, "learning_rate": 8.100945182718705e-06, "loss": 0.5488, "step": 4842 }, { "epoch": 0.62, "grad_norm": 1.6263617389601803, "learning_rate": 8.100135886450916e-06, "loss": 0.651, "step": 4843 }, { "epoch": 0.62, "grad_norm": 0.7260252159977802, "learning_rate": 8.099326458220249e-06, "loss": 0.5937, "step": 4844 }, { "epoch": 0.62, "grad_norm": 1.2379358999520154, "learning_rate": 8.098516898061159e-06, "loss": 0.6083, "step": 4845 }, { "epoch": 0.62, "grad_norm": 0.5925252654182341, "learning_rate": 8.097707206008102e-06, "loss": 0.5397, "step": 4846 }, { "epoch": 0.62, "grad_norm": 0.6085251850615595, "learning_rate": 8.096897382095553e-06, "loss": 0.5117, "step": 4847 }, { "epoch": 0.62, "grad_norm": 0.7328260826979824, "learning_rate": 8.096087426357978e-06, "loss": 0.6174, "step": 4848 }, { "epoch": 0.62, "grad_norm": 0.6541158758973585, "learning_rate": 8.095277338829853e-06, "loss": 0.5069, "step": 4849 }, { "epoch": 0.62, "grad_norm": 0.5777393348145821, "learning_rate": 8.094467119545663e-06, "loss": 0.5154, "step": 4850 }, { "epoch": 0.62, "grad_norm": 0.7090133563575965, "learning_rate": 8.0936567685399e-06, "loss": 0.5341, "step": 4851 }, { "epoch": 0.62, "grad_norm": 0.7579490527579456, "learning_rate": 8.092846285847049e-06, "loss": 0.6279, "step": 4852 }, { "epoch": 0.62, "grad_norm": 0.6256002701802358, "learning_rate": 8.092035671501617e-06, "loss": 0.5118, "step": 4853 }, { "epoch": 0.62, "grad_norm": 0.6212915649896393, "learning_rate": 8.091224925538108e-06, "loss": 0.5432, "step": 4854 }, { "epoch": 0.62, "grad_norm": 0.5667830327736616, "learning_rate": 8.090414047991031e-06, "loss": 0.4996, "step": 4855 }, { "epoch": 0.62, "grad_norm": 0.7373447686309222, "learning_rate": 8.089603038894904e-06, "loss": 0.5686, "step": 4856 }, { "epoch": 0.62, "grad_norm": 0.9525439248404798, "learning_rate": 8.08879189828425e-06, "loss": 0.5795, "step": 4857 }, { "epoch": 0.62, "grad_norm": 0.7785048846145941, "learning_rate": 8.087980626193592e-06, "loss": 0.6069, "step": 4858 }, { "epoch": 0.62, "grad_norm": 0.6276731033295745, "learning_rate": 8.08716922265747e-06, "loss": 0.6157, "step": 4859 }, { "epoch": 0.62, "grad_norm": 0.6201341741204739, "learning_rate": 8.086357687710417e-06, "loss": 0.5355, "step": 4860 }, { "epoch": 0.62, "grad_norm": 0.8459649364744836, "learning_rate": 8.08554602138698e-06, "loss": 0.6384, "step": 4861 }, { "epoch": 0.62, "grad_norm": 0.7000963566777608, "learning_rate": 8.08473422372171e-06, "loss": 0.6873, "step": 4862 }, { "epoch": 0.62, "grad_norm": 0.6653633755268963, "learning_rate": 8.08392229474916e-06, "loss": 0.5585, "step": 4863 }, { "epoch": 0.62, "grad_norm": 0.6239248567406012, "learning_rate": 8.083110234503895e-06, "loss": 0.5455, "step": 4864 }, { "epoch": 0.62, "grad_norm": 0.7981524243795168, "learning_rate": 8.082298043020478e-06, "loss": 0.6161, "step": 4865 }, { "epoch": 0.62, "grad_norm": 0.7162597540883014, "learning_rate": 8.081485720333482e-06, "loss": 0.5786, "step": 4866 }, { "epoch": 0.62, "grad_norm": 0.6799110198446136, "learning_rate": 8.080673266477488e-06, "loss": 0.5397, "step": 4867 }, { "epoch": 0.62, "grad_norm": 0.7345853282228795, "learning_rate": 8.079860681487076e-06, "loss": 0.59, "step": 4868 }, { "epoch": 0.62, "grad_norm": 0.6141263424194727, "learning_rate": 8.079047965396839e-06, "loss": 0.5624, "step": 4869 }, { "epoch": 0.62, "grad_norm": 0.5948313931876724, "learning_rate": 8.078235118241365e-06, "loss": 0.5591, "step": 4870 }, { "epoch": 0.62, "grad_norm": 0.8005217658260888, "learning_rate": 8.077422140055261e-06, "loss": 0.6141, "step": 4871 }, { "epoch": 0.62, "grad_norm": 0.5711097481074564, "learning_rate": 8.07660903087313e-06, "loss": 0.5394, "step": 4872 }, { "epoch": 0.62, "grad_norm": 0.7340286153341028, "learning_rate": 8.075795790729586e-06, "loss": 0.6156, "step": 4873 }, { "epoch": 0.62, "grad_norm": 0.7820995091205901, "learning_rate": 8.074982419659244e-06, "loss": 0.5836, "step": 4874 }, { "epoch": 0.62, "grad_norm": 0.6910957990008223, "learning_rate": 8.074168917696724e-06, "loss": 0.5851, "step": 4875 }, { "epoch": 0.62, "grad_norm": 0.8480003491441266, "learning_rate": 8.073355284876658e-06, "loss": 0.5989, "step": 4876 }, { "epoch": 0.62, "grad_norm": 0.8737900651445702, "learning_rate": 8.072541521233679e-06, "loss": 0.6439, "step": 4877 }, { "epoch": 0.62, "grad_norm": 0.8007813808033702, "learning_rate": 8.071727626802424e-06, "loss": 0.6284, "step": 4878 }, { "epoch": 0.62, "grad_norm": 0.6936560572133352, "learning_rate": 8.070913601617543e-06, "loss": 0.5069, "step": 4879 }, { "epoch": 0.62, "grad_norm": 0.7981425101843691, "learning_rate": 8.07009944571368e-06, "loss": 0.6347, "step": 4880 }, { "epoch": 0.62, "grad_norm": 0.5779411834405241, "learning_rate": 8.069285159125496e-06, "loss": 0.5422, "step": 4881 }, { "epoch": 0.62, "grad_norm": 0.8190092659255641, "learning_rate": 8.068470741887651e-06, "loss": 0.6454, "step": 4882 }, { "epoch": 0.62, "grad_norm": 0.804109598707946, "learning_rate": 8.067656194034811e-06, "loss": 0.5844, "step": 4883 }, { "epoch": 0.62, "grad_norm": 0.5820104262480262, "learning_rate": 8.06684151560165e-06, "loss": 0.5529, "step": 4884 }, { "epoch": 0.62, "grad_norm": 0.6473168890837421, "learning_rate": 8.066026706622847e-06, "loss": 0.5917, "step": 4885 }, { "epoch": 0.62, "grad_norm": 0.7896002476119041, "learning_rate": 8.065211767133084e-06, "loss": 0.6267, "step": 4886 }, { "epoch": 0.62, "grad_norm": 0.8729663423341352, "learning_rate": 8.06439669716705e-06, "loss": 0.683, "step": 4887 }, { "epoch": 0.62, "grad_norm": 0.8224710341287688, "learning_rate": 8.063581496759443e-06, "loss": 0.6236, "step": 4888 }, { "epoch": 0.62, "grad_norm": 0.6662888533544024, "learning_rate": 8.06276616594496e-06, "loss": 0.5263, "step": 4889 }, { "epoch": 0.62, "grad_norm": 0.9789375430210324, "learning_rate": 8.061950704758307e-06, "loss": 0.6493, "step": 4890 }, { "epoch": 0.62, "grad_norm": 0.8049248495167562, "learning_rate": 8.061135113234199e-06, "loss": 0.5939, "step": 4891 }, { "epoch": 0.62, "grad_norm": 0.683245514292312, "learning_rate": 8.060319391407352e-06, "loss": 0.5611, "step": 4892 }, { "epoch": 0.62, "grad_norm": 0.5641700951459367, "learning_rate": 8.059503539312487e-06, "loss": 0.5655, "step": 4893 }, { "epoch": 0.62, "grad_norm": 0.5811884318442994, "learning_rate": 8.058687556984333e-06, "loss": 0.5113, "step": 4894 }, { "epoch": 0.62, "grad_norm": 0.5629376793973534, "learning_rate": 8.057871444457624e-06, "loss": 0.4969, "step": 4895 }, { "epoch": 0.62, "grad_norm": 0.6015200960107091, "learning_rate": 8.057055201767098e-06, "loss": 0.5512, "step": 4896 }, { "epoch": 0.62, "grad_norm": 0.6332036574827241, "learning_rate": 8.056238828947503e-06, "loss": 0.5584, "step": 4897 }, { "epoch": 0.62, "grad_norm": 0.5763057480957622, "learning_rate": 8.055422326033586e-06, "loss": 0.5248, "step": 4898 }, { "epoch": 0.62, "grad_norm": 0.8874959396134705, "learning_rate": 8.054605693060106e-06, "loss": 0.6002, "step": 4899 }, { "epoch": 0.62, "grad_norm": 0.6794838650061711, "learning_rate": 8.053788930061822e-06, "loss": 0.5925, "step": 4900 }, { "epoch": 0.62, "grad_norm": 0.8484945740126925, "learning_rate": 8.052972037073501e-06, "loss": 0.6414, "step": 4901 }, { "epoch": 0.62, "grad_norm": 0.612939219459935, "learning_rate": 8.052155014129916e-06, "loss": 0.5461, "step": 4902 }, { "epoch": 0.62, "grad_norm": 0.7431409948859721, "learning_rate": 8.051337861265847e-06, "loss": 0.5727, "step": 4903 }, { "epoch": 0.62, "grad_norm": 0.5868836082094497, "learning_rate": 8.050520578516074e-06, "loss": 0.5204, "step": 4904 }, { "epoch": 0.62, "grad_norm": 0.6702798902963087, "learning_rate": 8.04970316591539e-06, "loss": 0.5689, "step": 4905 }, { "epoch": 0.63, "grad_norm": 0.7830835168618177, "learning_rate": 8.048885623498587e-06, "loss": 0.6054, "step": 4906 }, { "epoch": 0.63, "grad_norm": 0.7448351170870566, "learning_rate": 8.048067951300465e-06, "loss": 0.6233, "step": 4907 }, { "epoch": 0.63, "grad_norm": 0.6486517744835938, "learning_rate": 8.04725014935583e-06, "loss": 0.554, "step": 4908 }, { "epoch": 0.63, "grad_norm": 0.6722504772304511, "learning_rate": 8.046432217699496e-06, "loss": 0.5193, "step": 4909 }, { "epoch": 0.63, "grad_norm": 0.8182603359914264, "learning_rate": 8.045614156366276e-06, "loss": 0.6271, "step": 4910 }, { "epoch": 0.63, "grad_norm": 0.6446605809609107, "learning_rate": 8.044795965390995e-06, "loss": 0.5534, "step": 4911 }, { "epoch": 0.63, "grad_norm": 0.7035520761978654, "learning_rate": 8.043977644808478e-06, "loss": 0.5389, "step": 4912 }, { "epoch": 0.63, "grad_norm": 0.827594038147539, "learning_rate": 8.04315919465356e-06, "loss": 0.6375, "step": 4913 }, { "epoch": 0.63, "grad_norm": 0.7374695926988003, "learning_rate": 8.04234061496108e-06, "loss": 0.6404, "step": 4914 }, { "epoch": 0.63, "grad_norm": 0.5693042303910528, "learning_rate": 8.041521905765883e-06, "loss": 0.5619, "step": 4915 }, { "epoch": 0.63, "grad_norm": 0.5875885745632737, "learning_rate": 8.040703067102816e-06, "loss": 0.5437, "step": 4916 }, { "epoch": 0.63, "grad_norm": 0.7185821643081933, "learning_rate": 8.039884099006739e-06, "loss": 0.5417, "step": 4917 }, { "epoch": 0.63, "grad_norm": 0.6892261854689584, "learning_rate": 8.039065001512508e-06, "loss": 0.5838, "step": 4918 }, { "epoch": 0.63, "grad_norm": 0.6964076157822582, "learning_rate": 8.03824577465499e-06, "loss": 0.5413, "step": 4919 }, { "epoch": 0.63, "grad_norm": 0.7527568758841825, "learning_rate": 8.037426418469058e-06, "loss": 0.6089, "step": 4920 }, { "epoch": 0.63, "grad_norm": 0.7509769115590305, "learning_rate": 8.036606932989592e-06, "loss": 0.5368, "step": 4921 }, { "epoch": 0.63, "grad_norm": 0.5815321045061785, "learning_rate": 8.03578731825147e-06, "loss": 0.4913, "step": 4922 }, { "epoch": 0.63, "grad_norm": 0.7236216992126074, "learning_rate": 8.034967574289587e-06, "loss": 0.6146, "step": 4923 }, { "epoch": 0.63, "grad_norm": 0.7644428638996943, "learning_rate": 8.03414770113883e-06, "loss": 0.528, "step": 4924 }, { "epoch": 0.63, "grad_norm": 0.6993079172656466, "learning_rate": 8.033327698834101e-06, "loss": 0.5754, "step": 4925 }, { "epoch": 0.63, "grad_norm": 0.7313450764515096, "learning_rate": 8.032507567410304e-06, "loss": 0.6255, "step": 4926 }, { "epoch": 0.63, "grad_norm": 0.761825300077517, "learning_rate": 8.031687306902352e-06, "loss": 0.6288, "step": 4927 }, { "epoch": 0.63, "grad_norm": 0.6141230843182297, "learning_rate": 8.030866917345159e-06, "loss": 0.5166, "step": 4928 }, { "epoch": 0.63, "grad_norm": 0.6644433915350286, "learning_rate": 8.030046398773647e-06, "loss": 0.5611, "step": 4929 }, { "epoch": 0.63, "grad_norm": 0.573637497473543, "learning_rate": 8.029225751222743e-06, "loss": 0.5396, "step": 4930 }, { "epoch": 0.63, "grad_norm": 0.6878188589679671, "learning_rate": 8.028404974727377e-06, "loss": 0.5248, "step": 4931 }, { "epoch": 0.63, "grad_norm": 0.8069764538185773, "learning_rate": 8.02758406932249e-06, "loss": 0.6407, "step": 4932 }, { "epoch": 0.63, "grad_norm": 0.6955927656844051, "learning_rate": 8.026763035043021e-06, "loss": 0.5512, "step": 4933 }, { "epoch": 0.63, "grad_norm": 0.7575647114305473, "learning_rate": 8.025941871923926e-06, "loss": 0.5891, "step": 4934 }, { "epoch": 0.63, "grad_norm": 0.6995656652211012, "learning_rate": 8.025120580000153e-06, "loss": 0.5557, "step": 4935 }, { "epoch": 0.63, "grad_norm": 0.7235148605292684, "learning_rate": 8.024299159306663e-06, "loss": 0.6974, "step": 4936 }, { "epoch": 0.63, "grad_norm": 0.6035609518052024, "learning_rate": 8.023477609878422e-06, "loss": 0.5044, "step": 4937 }, { "epoch": 0.63, "grad_norm": 0.5910437522974566, "learning_rate": 8.022655931750403e-06, "loss": 0.5683, "step": 4938 }, { "epoch": 0.63, "grad_norm": 0.6474405146828806, "learning_rate": 8.021834124957578e-06, "loss": 0.5213, "step": 4939 }, { "epoch": 0.63, "grad_norm": 0.8086742935312414, "learning_rate": 8.021012189534931e-06, "loss": 0.6436, "step": 4940 }, { "epoch": 0.63, "grad_norm": 0.616790651415157, "learning_rate": 8.020190125517451e-06, "loss": 0.5632, "step": 4941 }, { "epoch": 0.63, "grad_norm": 0.9826153405074254, "learning_rate": 8.019367932940126e-06, "loss": 0.6423, "step": 4942 }, { "epoch": 0.63, "grad_norm": 0.7436441861685466, "learning_rate": 8.018545611837959e-06, "loss": 0.6507, "step": 4943 }, { "epoch": 0.63, "grad_norm": 1.4778030251635295, "learning_rate": 8.017723162245948e-06, "loss": 0.6036, "step": 4944 }, { "epoch": 0.63, "grad_norm": 0.6638589947665885, "learning_rate": 8.016900584199106e-06, "loss": 0.5245, "step": 4945 }, { "epoch": 0.63, "grad_norm": 0.580197713064733, "learning_rate": 8.016077877732448e-06, "loss": 0.5446, "step": 4946 }, { "epoch": 0.63, "grad_norm": 0.6682971371288308, "learning_rate": 8.015255042880993e-06, "loss": 0.5816, "step": 4947 }, { "epoch": 0.63, "grad_norm": 0.7183818744936854, "learning_rate": 8.014432079679766e-06, "loss": 0.5814, "step": 4948 }, { "epoch": 0.63, "grad_norm": 0.685527682626321, "learning_rate": 8.013608988163797e-06, "loss": 0.6078, "step": 4949 }, { "epoch": 0.63, "grad_norm": 0.59734172242765, "learning_rate": 8.012785768368125e-06, "loss": 0.5283, "step": 4950 }, { "epoch": 0.63, "grad_norm": 0.6447979255046242, "learning_rate": 8.011962420327788e-06, "loss": 0.604, "step": 4951 }, { "epoch": 0.63, "grad_norm": 0.809380976290288, "learning_rate": 8.011138944077838e-06, "loss": 0.6925, "step": 4952 }, { "epoch": 0.63, "grad_norm": 0.7869086565648559, "learning_rate": 8.010315339653325e-06, "loss": 0.6037, "step": 4953 }, { "epoch": 0.63, "grad_norm": 0.7987426487144512, "learning_rate": 8.009491607089305e-06, "loss": 0.6005, "step": 4954 }, { "epoch": 0.63, "grad_norm": 0.7516274719269177, "learning_rate": 8.008667746420849e-06, "loss": 0.6318, "step": 4955 }, { "epoch": 0.63, "grad_norm": 0.7740498530156996, "learning_rate": 8.007843757683019e-06, "loss": 0.598, "step": 4956 }, { "epoch": 0.63, "grad_norm": 0.6904989824350313, "learning_rate": 8.00701964091089e-06, "loss": 0.5592, "step": 4957 }, { "epoch": 0.63, "grad_norm": 0.6199203109786972, "learning_rate": 8.006195396139545e-06, "loss": 0.5515, "step": 4958 }, { "epoch": 0.63, "grad_norm": 0.8311154963488638, "learning_rate": 8.00537102340407e-06, "loss": 0.6265, "step": 4959 }, { "epoch": 0.63, "grad_norm": 0.673649226032902, "learning_rate": 8.004546522739553e-06, "loss": 0.5239, "step": 4960 }, { "epoch": 0.63, "grad_norm": 0.9741430450093904, "learning_rate": 8.003721894181092e-06, "loss": 0.596, "step": 4961 }, { "epoch": 0.63, "grad_norm": 0.7731955285755833, "learning_rate": 8.00289713776379e-06, "loss": 0.6207, "step": 4962 }, { "epoch": 0.63, "grad_norm": 0.8706968197408063, "learning_rate": 8.00207225352275e-06, "loss": 0.6353, "step": 4963 }, { "epoch": 0.63, "grad_norm": 0.8210021030467289, "learning_rate": 8.001247241493089e-06, "loss": 0.6399, "step": 4964 }, { "epoch": 0.63, "grad_norm": 0.6537251604318643, "learning_rate": 8.000422101709923e-06, "loss": 0.5753, "step": 4965 }, { "epoch": 0.63, "grad_norm": 0.9390043673291344, "learning_rate": 7.999596834208377e-06, "loss": 0.6659, "step": 4966 }, { "epoch": 0.63, "grad_norm": 0.963241817320959, "learning_rate": 7.998771439023578e-06, "loss": 0.6748, "step": 4967 }, { "epoch": 0.63, "grad_norm": 0.789801755202053, "learning_rate": 7.997945916190661e-06, "loss": 0.6539, "step": 4968 }, { "epoch": 0.63, "grad_norm": 0.7281406478570932, "learning_rate": 7.99712026574477e-06, "loss": 0.5985, "step": 4969 }, { "epoch": 0.63, "grad_norm": 0.7085923490566987, "learning_rate": 7.996294487721041e-06, "loss": 0.5913, "step": 4970 }, { "epoch": 0.63, "grad_norm": 0.6460091542718994, "learning_rate": 7.995468582154634e-06, "loss": 0.4897, "step": 4971 }, { "epoch": 0.63, "grad_norm": 0.624189856220877, "learning_rate": 7.994642549080702e-06, "loss": 0.5663, "step": 4972 }, { "epoch": 0.63, "grad_norm": 0.7484498850374535, "learning_rate": 7.993816388534404e-06, "loss": 0.5894, "step": 4973 }, { "epoch": 0.63, "grad_norm": 0.81555407216843, "learning_rate": 7.99299010055091e-06, "loss": 0.5471, "step": 4974 }, { "epoch": 0.63, "grad_norm": 0.574974637473329, "learning_rate": 7.992163685165393e-06, "loss": 0.5283, "step": 4975 }, { "epoch": 0.63, "grad_norm": 0.6329118697095062, "learning_rate": 7.991337142413029e-06, "loss": 0.5339, "step": 4976 }, { "epoch": 0.63, "grad_norm": 0.6164356332347005, "learning_rate": 7.990510472329e-06, "loss": 0.5469, "step": 4977 }, { "epoch": 0.63, "grad_norm": 0.6144438291787048, "learning_rate": 7.989683674948498e-06, "loss": 0.4932, "step": 4978 }, { "epoch": 0.63, "grad_norm": 0.8879003383011629, "learning_rate": 7.988856750306716e-06, "loss": 0.6502, "step": 4979 }, { "epoch": 0.63, "grad_norm": 0.7075341439946503, "learning_rate": 7.988029698438853e-06, "loss": 0.5467, "step": 4980 }, { "epoch": 0.63, "grad_norm": 0.651597186699634, "learning_rate": 7.987202519380114e-06, "loss": 0.5461, "step": 4981 }, { "epoch": 0.63, "grad_norm": 0.8258889716800095, "learning_rate": 7.98637521316571e-06, "loss": 0.6369, "step": 4982 }, { "epoch": 0.63, "grad_norm": 0.6822670076700751, "learning_rate": 7.985547779830856e-06, "loss": 0.5853, "step": 4983 }, { "epoch": 0.63, "grad_norm": 0.6908581928240837, "learning_rate": 7.984720219410773e-06, "loss": 0.5748, "step": 4984 }, { "epoch": 0.64, "grad_norm": 0.9160495837039937, "learning_rate": 7.983892531940687e-06, "loss": 0.6137, "step": 4985 }, { "epoch": 0.64, "grad_norm": 0.5555883207325232, "learning_rate": 7.983064717455832e-06, "loss": 0.4661, "step": 4986 }, { "epoch": 0.64, "grad_norm": 0.6066013306324208, "learning_rate": 7.982236775991445e-06, "loss": 0.5452, "step": 4987 }, { "epoch": 0.64, "grad_norm": 0.6284802021881586, "learning_rate": 7.981408707582769e-06, "loss": 0.5563, "step": 4988 }, { "epoch": 0.64, "grad_norm": 0.6886870793431915, "learning_rate": 7.98058051226505e-06, "loss": 0.6052, "step": 4989 }, { "epoch": 0.64, "grad_norm": 0.6990423918403783, "learning_rate": 7.979752190073543e-06, "loss": 0.551, "step": 4990 }, { "epoch": 0.64, "grad_norm": 0.7674093938950507, "learning_rate": 7.978923741043508e-06, "loss": 0.5451, "step": 4991 }, { "epoch": 0.64, "grad_norm": 2.199156064081174, "learning_rate": 7.978095165210209e-06, "loss": 0.6322, "step": 4992 }, { "epoch": 0.64, "grad_norm": 0.6908099319990278, "learning_rate": 7.977266462608915e-06, "loss": 0.5999, "step": 4993 }, { "epoch": 0.64, "grad_norm": 0.6431466981335197, "learning_rate": 7.976437633274901e-06, "loss": 0.5218, "step": 4994 }, { "epoch": 0.64, "grad_norm": 0.6817665191447893, "learning_rate": 7.975608677243449e-06, "loss": 0.5663, "step": 4995 }, { "epoch": 0.64, "grad_norm": 0.6648658984445288, "learning_rate": 7.974779594549844e-06, "loss": 0.5393, "step": 4996 }, { "epoch": 0.64, "grad_norm": 0.6637238420430308, "learning_rate": 7.973950385229378e-06, "loss": 0.5556, "step": 4997 }, { "epoch": 0.64, "grad_norm": 0.8176644553156135, "learning_rate": 7.973121049317349e-06, "loss": 0.6779, "step": 4998 }, { "epoch": 0.64, "grad_norm": 0.6876679393660092, "learning_rate": 7.972291586849054e-06, "loss": 0.5757, "step": 4999 }, { "epoch": 0.64, "grad_norm": 0.6717052628132845, "learning_rate": 7.971461997859808e-06, "loss": 0.5094, "step": 5000 }, { "epoch": 0.64, "grad_norm": 0.7194117786502368, "learning_rate": 7.970632282384918e-06, "loss": 0.5393, "step": 5001 }, { "epoch": 0.64, "grad_norm": 1.0149557103812121, "learning_rate": 7.969802440459704e-06, "loss": 0.599, "step": 5002 }, { "epoch": 0.64, "grad_norm": 0.6942619217568177, "learning_rate": 7.968972472119491e-06, "loss": 0.5802, "step": 5003 }, { "epoch": 0.64, "grad_norm": 0.5765078417775431, "learning_rate": 7.968142377399608e-06, "loss": 0.4968, "step": 5004 }, { "epoch": 0.64, "grad_norm": 0.811633019565186, "learning_rate": 7.967312156335389e-06, "loss": 0.6265, "step": 5005 }, { "epoch": 0.64, "grad_norm": 0.8410833284701286, "learning_rate": 7.966481808962174e-06, "loss": 0.6364, "step": 5006 }, { "epoch": 0.64, "grad_norm": 0.7550709211650153, "learning_rate": 7.965651335315305e-06, "loss": 0.5816, "step": 5007 }, { "epoch": 0.64, "grad_norm": 0.6682562657334422, "learning_rate": 7.96482073543014e-06, "loss": 0.5109, "step": 5008 }, { "epoch": 0.64, "grad_norm": 0.7032653731154704, "learning_rate": 7.963990009342026e-06, "loss": 0.5968, "step": 5009 }, { "epoch": 0.64, "grad_norm": 0.6804902791575567, "learning_rate": 7.96315915708633e-06, "loss": 0.5686, "step": 5010 }, { "epoch": 0.64, "grad_norm": 0.7233130333012316, "learning_rate": 7.96232817869842e-06, "loss": 0.6483, "step": 5011 }, { "epoch": 0.64, "grad_norm": 0.5943661914938109, "learning_rate": 7.961497074213664e-06, "loss": 0.5234, "step": 5012 }, { "epoch": 0.64, "grad_norm": 0.7687568441710754, "learning_rate": 7.96066584366744e-06, "loss": 0.6158, "step": 5013 }, { "epoch": 0.64, "grad_norm": 0.7907268912226469, "learning_rate": 7.959834487095135e-06, "loss": 0.5825, "step": 5014 }, { "epoch": 0.64, "grad_norm": 0.8253108962829002, "learning_rate": 7.959003004532132e-06, "loss": 0.6264, "step": 5015 }, { "epoch": 0.64, "grad_norm": 0.8314557847227163, "learning_rate": 7.958171396013827e-06, "loss": 0.6159, "step": 5016 }, { "epoch": 0.64, "grad_norm": 0.6070493284253823, "learning_rate": 7.957339661575618e-06, "loss": 0.5816, "step": 5017 }, { "epoch": 0.64, "grad_norm": 0.8312694095835157, "learning_rate": 7.956507801252912e-06, "loss": 0.6823, "step": 5018 }, { "epoch": 0.64, "grad_norm": 0.7910304487107016, "learning_rate": 7.955675815081114e-06, "loss": 0.5786, "step": 5019 }, { "epoch": 0.64, "grad_norm": 0.6418403797533321, "learning_rate": 7.954843703095644e-06, "loss": 0.571, "step": 5020 }, { "epoch": 0.64, "grad_norm": 0.9557562828315895, "learning_rate": 7.954011465331917e-06, "loss": 0.5964, "step": 5021 }, { "epoch": 0.64, "grad_norm": 0.7629141335309464, "learning_rate": 7.953179101825364e-06, "loss": 0.5923, "step": 5022 }, { "epoch": 0.64, "grad_norm": 0.5695493872500682, "learning_rate": 7.952346612611413e-06, "loss": 0.5009, "step": 5023 }, { "epoch": 0.64, "grad_norm": 0.5967780145596528, "learning_rate": 7.951513997725501e-06, "loss": 0.5137, "step": 5024 }, { "epoch": 0.64, "grad_norm": 0.6046722760236224, "learning_rate": 7.95068125720307e-06, "loss": 0.5109, "step": 5025 }, { "epoch": 0.64, "grad_norm": 0.9257658633257344, "learning_rate": 7.949848391079566e-06, "loss": 0.6057, "step": 5026 }, { "epoch": 0.64, "grad_norm": 0.7817539946449922, "learning_rate": 7.949015399390443e-06, "loss": 0.637, "step": 5027 }, { "epoch": 0.64, "grad_norm": 0.5977205782331978, "learning_rate": 7.94818228217116e-06, "loss": 0.511, "step": 5028 }, { "epoch": 0.64, "grad_norm": 0.845289093357311, "learning_rate": 7.947349039457175e-06, "loss": 0.6272, "step": 5029 }, { "epoch": 0.64, "grad_norm": 0.6242821350284501, "learning_rate": 7.946515671283962e-06, "loss": 0.5621, "step": 5030 }, { "epoch": 0.64, "grad_norm": 0.740643890292027, "learning_rate": 7.945682177686992e-06, "loss": 0.5391, "step": 5031 }, { "epoch": 0.64, "grad_norm": 0.6881059538004348, "learning_rate": 7.944848558701743e-06, "loss": 0.6137, "step": 5032 }, { "epoch": 0.64, "grad_norm": 0.5642095014106421, "learning_rate": 7.944014814363703e-06, "loss": 0.514, "step": 5033 }, { "epoch": 0.64, "grad_norm": 0.6670245561237913, "learning_rate": 7.943180944708361e-06, "loss": 0.6139, "step": 5034 }, { "epoch": 0.64, "grad_norm": 0.7789270633283152, "learning_rate": 7.942346949771211e-06, "loss": 0.5987, "step": 5035 }, { "epoch": 0.64, "grad_norm": 0.572422754598143, "learning_rate": 7.941512829587753e-06, "loss": 0.5582, "step": 5036 }, { "epoch": 0.64, "grad_norm": 0.5562236654922782, "learning_rate": 7.940678584193492e-06, "loss": 0.5599, "step": 5037 }, { "epoch": 0.64, "grad_norm": 0.6948678762479501, "learning_rate": 7.939844213623942e-06, "loss": 0.5732, "step": 5038 }, { "epoch": 0.64, "grad_norm": 0.7105738402823709, "learning_rate": 7.939009717914619e-06, "loss": 0.6254, "step": 5039 }, { "epoch": 0.64, "grad_norm": 0.6884141398825876, "learning_rate": 7.938175097101043e-06, "loss": 0.5349, "step": 5040 }, { "epoch": 0.64, "grad_norm": 0.7075112009203091, "learning_rate": 7.937340351218743e-06, "loss": 0.5585, "step": 5041 }, { "epoch": 0.64, "grad_norm": 0.5841872756180618, "learning_rate": 7.936505480303251e-06, "loss": 0.5211, "step": 5042 }, { "epoch": 0.64, "grad_norm": 0.7582660928114365, "learning_rate": 7.935670484390103e-06, "loss": 0.5876, "step": 5043 }, { "epoch": 0.64, "grad_norm": 0.6394882090662029, "learning_rate": 7.934835363514843e-06, "loss": 0.564, "step": 5044 }, { "epoch": 0.64, "grad_norm": 0.5738471337815415, "learning_rate": 7.934000117713021e-06, "loss": 0.5109, "step": 5045 }, { "epoch": 0.64, "grad_norm": 0.6522987781055405, "learning_rate": 7.933164747020191e-06, "loss": 0.5848, "step": 5046 }, { "epoch": 0.64, "grad_norm": 0.7265418727142777, "learning_rate": 7.932329251471911e-06, "loss": 0.509, "step": 5047 }, { "epoch": 0.64, "grad_norm": 0.7495131456050885, "learning_rate": 7.931493631103743e-06, "loss": 0.6102, "step": 5048 }, { "epoch": 0.64, "grad_norm": 0.7328420990163743, "learning_rate": 7.930657885951261e-06, "loss": 0.5479, "step": 5049 }, { "epoch": 0.64, "grad_norm": 0.620422845891505, "learning_rate": 7.929822016050034e-06, "loss": 0.5732, "step": 5050 }, { "epoch": 0.64, "grad_norm": 0.7642114420941445, "learning_rate": 7.92898602143565e-06, "loss": 0.6453, "step": 5051 }, { "epoch": 0.64, "grad_norm": 0.7367152211775877, "learning_rate": 7.92814990214369e-06, "loss": 0.5673, "step": 5052 }, { "epoch": 0.64, "grad_norm": 0.53367041818719, "learning_rate": 7.927313658209744e-06, "loss": 0.5007, "step": 5053 }, { "epoch": 0.64, "grad_norm": 0.7977123909340416, "learning_rate": 7.926477289669411e-06, "loss": 0.6518, "step": 5054 }, { "epoch": 0.64, "grad_norm": 0.8386176978568222, "learning_rate": 7.925640796558291e-06, "loss": 0.6049, "step": 5055 }, { "epoch": 0.64, "grad_norm": 1.7815541597991627, "learning_rate": 7.924804178911993e-06, "loss": 0.579, "step": 5056 }, { "epoch": 0.64, "grad_norm": 0.7325344772733176, "learning_rate": 7.923967436766126e-06, "loss": 0.55, "step": 5057 }, { "epoch": 0.64, "grad_norm": 0.7146358864359678, "learning_rate": 7.92313057015631e-06, "loss": 0.6017, "step": 5058 }, { "epoch": 0.64, "grad_norm": 0.6840612327982274, "learning_rate": 7.922293579118165e-06, "loss": 0.5505, "step": 5059 }, { "epoch": 0.64, "grad_norm": 0.5578037891999774, "learning_rate": 7.921456463687323e-06, "loss": 0.5551, "step": 5060 }, { "epoch": 0.64, "grad_norm": 0.7035750119166656, "learning_rate": 7.920619223899413e-06, "loss": 0.5665, "step": 5061 }, { "epoch": 0.64, "grad_norm": 0.7074648346030253, "learning_rate": 7.919781859790076e-06, "loss": 0.5221, "step": 5062 }, { "epoch": 0.65, "grad_norm": 0.7985287647041702, "learning_rate": 7.918944371394953e-06, "loss": 0.5882, "step": 5063 }, { "epoch": 0.65, "grad_norm": 0.6836268680802838, "learning_rate": 7.9181067587497e-06, "loss": 0.5696, "step": 5064 }, { "epoch": 0.65, "grad_norm": 0.7989420764743823, "learning_rate": 7.917269021889965e-06, "loss": 0.5797, "step": 5065 }, { "epoch": 0.65, "grad_norm": 0.7147370856602557, "learning_rate": 7.91643116085141e-06, "loss": 0.565, "step": 5066 }, { "epoch": 0.65, "grad_norm": 0.7860893362529873, "learning_rate": 7.915593175669702e-06, "loss": 0.601, "step": 5067 }, { "epoch": 0.65, "grad_norm": 0.8811939473588392, "learning_rate": 7.914755066380508e-06, "loss": 0.6297, "step": 5068 }, { "epoch": 0.65, "grad_norm": 0.6325432752600508, "learning_rate": 7.913916833019503e-06, "loss": 0.5523, "step": 5069 }, { "epoch": 0.65, "grad_norm": 0.5665737980940468, "learning_rate": 7.913078475622373e-06, "loss": 0.4338, "step": 5070 }, { "epoch": 0.65, "grad_norm": 0.7085118896327416, "learning_rate": 7.9122399942248e-06, "loss": 0.608, "step": 5071 }, { "epoch": 0.65, "grad_norm": 0.6413836376839173, "learning_rate": 7.911401388862477e-06, "loss": 0.5476, "step": 5072 }, { "epoch": 0.65, "grad_norm": 0.7335134286632833, "learning_rate": 7.9105626595711e-06, "loss": 0.5112, "step": 5073 }, { "epoch": 0.65, "grad_norm": 0.6778552221130795, "learning_rate": 7.909723806386372e-06, "loss": 0.5642, "step": 5074 }, { "epoch": 0.65, "grad_norm": 0.6949316492523618, "learning_rate": 7.908884829343998e-06, "loss": 0.6151, "step": 5075 }, { "epoch": 0.65, "grad_norm": 0.6620510132905717, "learning_rate": 7.908045728479694e-06, "loss": 0.5053, "step": 5076 }, { "epoch": 0.65, "grad_norm": 0.6588846465849989, "learning_rate": 7.907206503829176e-06, "loss": 0.5192, "step": 5077 }, { "epoch": 0.65, "grad_norm": 0.8842365911384155, "learning_rate": 7.906367155428168e-06, "loss": 0.6151, "step": 5078 }, { "epoch": 0.65, "grad_norm": 0.628025438165786, "learning_rate": 7.905527683312395e-06, "loss": 0.524, "step": 5079 }, { "epoch": 0.65, "grad_norm": 0.6083744230761006, "learning_rate": 7.904688087517595e-06, "loss": 0.5431, "step": 5080 }, { "epoch": 0.65, "grad_norm": 0.5776827949960666, "learning_rate": 7.903848368079506e-06, "loss": 0.505, "step": 5081 }, { "epoch": 0.65, "grad_norm": 0.5946324858724029, "learning_rate": 7.903008525033868e-06, "loss": 0.4874, "step": 5082 }, { "epoch": 0.65, "grad_norm": 0.7214488254666845, "learning_rate": 7.902168558416436e-06, "loss": 0.5664, "step": 5083 }, { "epoch": 0.65, "grad_norm": 0.7315511424239522, "learning_rate": 7.901328468262963e-06, "loss": 0.5936, "step": 5084 }, { "epoch": 0.65, "grad_norm": 0.6636904846832666, "learning_rate": 7.900488254609209e-06, "loss": 0.5117, "step": 5085 }, { "epoch": 0.65, "grad_norm": 0.6531652372518333, "learning_rate": 7.899647917490938e-06, "loss": 0.5689, "step": 5086 }, { "epoch": 0.65, "grad_norm": 0.9240937186441299, "learning_rate": 7.89880745694392e-06, "loss": 0.65, "step": 5087 }, { "epoch": 0.65, "grad_norm": 0.7957616863220984, "learning_rate": 7.897966873003933e-06, "loss": 0.6273, "step": 5088 }, { "epoch": 0.65, "grad_norm": 0.7467367534511393, "learning_rate": 7.897126165706757e-06, "loss": 0.5711, "step": 5089 }, { "epoch": 0.65, "grad_norm": 1.3916064669992472, "learning_rate": 7.896285335088177e-06, "loss": 0.6094, "step": 5090 }, { "epoch": 0.65, "grad_norm": 0.6232743246329217, "learning_rate": 7.895444381183985e-06, "loss": 0.5025, "step": 5091 }, { "epoch": 0.65, "grad_norm": 0.7756969175785325, "learning_rate": 7.89460330402998e-06, "loss": 0.6654, "step": 5092 }, { "epoch": 0.65, "grad_norm": 0.7707337467045592, "learning_rate": 7.89376210366196e-06, "loss": 0.6015, "step": 5093 }, { "epoch": 0.65, "grad_norm": 0.5844229986305767, "learning_rate": 7.892920780115737e-06, "loss": 0.5971, "step": 5094 }, { "epoch": 0.65, "grad_norm": 0.7163947909411027, "learning_rate": 7.892079333427118e-06, "loss": 0.6132, "step": 5095 }, { "epoch": 0.65, "grad_norm": 0.5902806092067072, "learning_rate": 7.891237763631925e-06, "loss": 0.4839, "step": 5096 }, { "epoch": 0.65, "grad_norm": 0.68979041909195, "learning_rate": 7.89039607076598e-06, "loss": 0.6174, "step": 5097 }, { "epoch": 0.65, "grad_norm": 0.8002207584875587, "learning_rate": 7.889554254865111e-06, "loss": 0.5945, "step": 5098 }, { "epoch": 0.65, "grad_norm": 0.7854459286449994, "learning_rate": 7.888712315965149e-06, "loss": 0.6174, "step": 5099 }, { "epoch": 0.65, "grad_norm": 0.7525764975305921, "learning_rate": 7.887870254101935e-06, "loss": 0.6494, "step": 5100 }, { "epoch": 0.65, "grad_norm": 0.6643851045720129, "learning_rate": 7.887028069311313e-06, "loss": 0.5931, "step": 5101 }, { "epoch": 0.65, "grad_norm": 0.8465028869449369, "learning_rate": 7.886185761629132e-06, "loss": 0.6563, "step": 5102 }, { "epoch": 0.65, "grad_norm": 0.8076228025908405, "learning_rate": 7.885343331091247e-06, "loss": 0.6046, "step": 5103 }, { "epoch": 0.65, "grad_norm": 0.5874870140906882, "learning_rate": 7.884500777733516e-06, "loss": 0.5657, "step": 5104 }, { "epoch": 0.65, "grad_norm": 2.226635961562645, "learning_rate": 7.883658101591804e-06, "loss": 0.558, "step": 5105 }, { "epoch": 0.65, "grad_norm": 0.7058727457265803, "learning_rate": 7.882815302701982e-06, "loss": 0.5941, "step": 5106 }, { "epoch": 0.65, "grad_norm": 0.5803580049785869, "learning_rate": 7.881972381099925e-06, "loss": 0.5172, "step": 5107 }, { "epoch": 0.65, "grad_norm": 0.6040349374902021, "learning_rate": 7.881129336821512e-06, "loss": 0.5331, "step": 5108 }, { "epoch": 0.65, "grad_norm": 0.6026742740070428, "learning_rate": 7.88028616990263e-06, "loss": 0.5429, "step": 5109 }, { "epoch": 0.65, "grad_norm": 0.6927095988027014, "learning_rate": 7.879442880379172e-06, "loss": 0.6484, "step": 5110 }, { "epoch": 0.65, "grad_norm": 0.5754632919120132, "learning_rate": 7.87859946828703e-06, "loss": 0.5088, "step": 5111 }, { "epoch": 0.65, "grad_norm": 0.5707306306612392, "learning_rate": 7.877755933662108e-06, "loss": 0.4958, "step": 5112 }, { "epoch": 0.65, "grad_norm": 0.7392420938090287, "learning_rate": 7.876912276540311e-06, "loss": 0.6147, "step": 5113 }, { "epoch": 0.65, "grad_norm": 0.5479266888669612, "learning_rate": 7.876068496957552e-06, "loss": 0.516, "step": 5114 }, { "epoch": 0.65, "grad_norm": 0.758498682855062, "learning_rate": 7.875224594949748e-06, "loss": 0.664, "step": 5115 }, { "epoch": 0.65, "grad_norm": 0.6984565023936016, "learning_rate": 7.874380570552822e-06, "loss": 0.5542, "step": 5116 }, { "epoch": 0.65, "grad_norm": 0.5626514143291393, "learning_rate": 7.8735364238027e-06, "loss": 0.4988, "step": 5117 }, { "epoch": 0.65, "grad_norm": 0.8381434701455139, "learning_rate": 7.872692154735313e-06, "loss": 0.6667, "step": 5118 }, { "epoch": 0.65, "grad_norm": 0.8450080472542723, "learning_rate": 7.871847763386602e-06, "loss": 0.6469, "step": 5119 }, { "epoch": 0.65, "grad_norm": 0.6069234026750302, "learning_rate": 7.871003249792508e-06, "loss": 0.5464, "step": 5120 }, { "epoch": 0.65, "grad_norm": 0.7521649567792072, "learning_rate": 7.87015861398898e-06, "loss": 0.5728, "step": 5121 }, { "epoch": 0.65, "grad_norm": 0.8635092037279701, "learning_rate": 7.869313856011974e-06, "loss": 0.6384, "step": 5122 }, { "epoch": 0.65, "grad_norm": 0.5564774881543907, "learning_rate": 7.868468975897445e-06, "loss": 0.5411, "step": 5123 }, { "epoch": 0.65, "grad_norm": 0.5449304868682998, "learning_rate": 7.867623973681358e-06, "loss": 0.4851, "step": 5124 }, { "epoch": 0.65, "grad_norm": 0.6184281164995215, "learning_rate": 7.86677884939968e-06, "loss": 0.531, "step": 5125 }, { "epoch": 0.65, "grad_norm": 0.8489258598200148, "learning_rate": 7.865933603088389e-06, "loss": 0.6572, "step": 5126 }, { "epoch": 0.65, "grad_norm": 0.638792438542576, "learning_rate": 7.865088234783463e-06, "loss": 0.5859, "step": 5127 }, { "epoch": 0.65, "grad_norm": 0.7543736467193048, "learning_rate": 7.864242744520886e-06, "loss": 0.5805, "step": 5128 }, { "epoch": 0.65, "grad_norm": 0.7625111433611972, "learning_rate": 7.863397132336648e-06, "loss": 0.6078, "step": 5129 }, { "epoch": 0.65, "grad_norm": 0.7196372673758752, "learning_rate": 7.862551398266744e-06, "loss": 0.5841, "step": 5130 }, { "epoch": 0.65, "grad_norm": 0.8401073786095281, "learning_rate": 7.861705542347175e-06, "loss": 0.5828, "step": 5131 }, { "epoch": 0.65, "grad_norm": 0.6065509265600705, "learning_rate": 7.860859564613945e-06, "loss": 0.5115, "step": 5132 }, { "epoch": 0.65, "grad_norm": 1.0771813157619665, "learning_rate": 7.860013465103065e-06, "loss": 0.6201, "step": 5133 }, { "epoch": 0.65, "grad_norm": 0.5939155001641759, "learning_rate": 7.859167243850552e-06, "loss": 0.4979, "step": 5134 }, { "epoch": 0.65, "grad_norm": 0.7375112666833743, "learning_rate": 7.858320900892427e-06, "loss": 0.6139, "step": 5135 }, { "epoch": 0.65, "grad_norm": 0.6441646194030141, "learning_rate": 7.857474436264714e-06, "loss": 0.5202, "step": 5136 }, { "epoch": 0.65, "grad_norm": 0.6054882863749236, "learning_rate": 7.856627850003444e-06, "loss": 0.5316, "step": 5137 }, { "epoch": 0.65, "grad_norm": 0.8045266530316749, "learning_rate": 7.855781142144658e-06, "loss": 0.6269, "step": 5138 }, { "epoch": 0.65, "grad_norm": 0.9468585915100367, "learning_rate": 7.854934312724391e-06, "loss": 0.6268, "step": 5139 }, { "epoch": 0.65, "grad_norm": 0.6460564456698489, "learning_rate": 7.854087361778698e-06, "loss": 0.5551, "step": 5140 }, { "epoch": 0.65, "grad_norm": 0.7607520314008942, "learning_rate": 7.853240289343621e-06, "loss": 0.6268, "step": 5141 }, { "epoch": 0.66, "grad_norm": 0.6359834973801624, "learning_rate": 7.852393095455226e-06, "loss": 0.5635, "step": 5142 }, { "epoch": 0.66, "grad_norm": 0.9100654112663452, "learning_rate": 7.851545780149571e-06, "loss": 0.6246, "step": 5143 }, { "epoch": 0.66, "grad_norm": 0.6760599372913728, "learning_rate": 7.850698343462724e-06, "loss": 0.5719, "step": 5144 }, { "epoch": 0.66, "grad_norm": 0.7202926353213159, "learning_rate": 7.849850785430759e-06, "loss": 0.5632, "step": 5145 }, { "epoch": 0.66, "grad_norm": 0.7183561679175698, "learning_rate": 7.849003106089754e-06, "loss": 0.5408, "step": 5146 }, { "epoch": 0.66, "grad_norm": 0.6055642801620112, "learning_rate": 7.848155305475789e-06, "loss": 0.5052, "step": 5147 }, { "epoch": 0.66, "grad_norm": 0.7046992852981054, "learning_rate": 7.847307383624953e-06, "loss": 0.5302, "step": 5148 }, { "epoch": 0.66, "grad_norm": 0.7820383122734619, "learning_rate": 7.846459340573342e-06, "loss": 0.6188, "step": 5149 }, { "epoch": 0.66, "grad_norm": 0.7600596662095117, "learning_rate": 7.845611176357054e-06, "loss": 0.6185, "step": 5150 }, { "epoch": 0.66, "grad_norm": 0.6274596287492995, "learning_rate": 7.844762891012188e-06, "loss": 0.5299, "step": 5151 }, { "epoch": 0.66, "grad_norm": 0.7672853534932366, "learning_rate": 7.84391448457486e-06, "loss": 0.629, "step": 5152 }, { "epoch": 0.66, "grad_norm": 0.5741163666937225, "learning_rate": 7.843065957081178e-06, "loss": 0.5292, "step": 5153 }, { "epoch": 0.66, "grad_norm": 0.8314494622019364, "learning_rate": 7.842217308567264e-06, "loss": 0.6659, "step": 5154 }, { "epoch": 0.66, "grad_norm": 0.6011434329075702, "learning_rate": 7.841368539069242e-06, "loss": 0.5511, "step": 5155 }, { "epoch": 0.66, "grad_norm": 0.8361578191754074, "learning_rate": 7.840519648623239e-06, "loss": 0.5895, "step": 5156 }, { "epoch": 0.66, "grad_norm": 0.5632250279890848, "learning_rate": 7.839670637265394e-06, "loss": 0.4852, "step": 5157 }, { "epoch": 0.66, "grad_norm": 0.7246292297571776, "learning_rate": 7.838821505031844e-06, "loss": 0.6514, "step": 5158 }, { "epoch": 0.66, "grad_norm": 0.7391546158511483, "learning_rate": 7.837972251958732e-06, "loss": 0.6161, "step": 5159 }, { "epoch": 0.66, "grad_norm": 0.8163291036002391, "learning_rate": 7.837122878082212e-06, "loss": 0.6397, "step": 5160 }, { "epoch": 0.66, "grad_norm": 0.8075185919964452, "learning_rate": 7.836273383438438e-06, "loss": 0.6156, "step": 5161 }, { "epoch": 0.66, "grad_norm": 0.7559379472919219, "learning_rate": 7.835423768063567e-06, "loss": 0.6047, "step": 5162 }, { "epoch": 0.66, "grad_norm": 0.8136407199916558, "learning_rate": 7.834574031993768e-06, "loss": 0.6001, "step": 5163 }, { "epoch": 0.66, "grad_norm": 0.7023863094800844, "learning_rate": 7.833724175265211e-06, "loss": 0.578, "step": 5164 }, { "epoch": 0.66, "grad_norm": 0.6659585123110534, "learning_rate": 7.83287419791407e-06, "loss": 0.5229, "step": 5165 }, { "epoch": 0.66, "grad_norm": 0.6094398168476903, "learning_rate": 7.832024099976528e-06, "loss": 0.5229, "step": 5166 }, { "epoch": 0.66, "grad_norm": 0.6019145873911041, "learning_rate": 7.83117388148877e-06, "loss": 0.4999, "step": 5167 }, { "epoch": 0.66, "grad_norm": 0.8972452164358072, "learning_rate": 7.830323542486986e-06, "loss": 0.6029, "step": 5168 }, { "epoch": 0.66, "grad_norm": 0.626960742856091, "learning_rate": 7.829473083007375e-06, "loss": 0.5823, "step": 5169 }, { "epoch": 0.66, "grad_norm": 0.7269685635796725, "learning_rate": 7.828622503086136e-06, "loss": 0.6062, "step": 5170 }, { "epoch": 0.66, "grad_norm": 0.7469406272220259, "learning_rate": 7.827771802759474e-06, "loss": 0.608, "step": 5171 }, { "epoch": 0.66, "grad_norm": 0.7433024029517992, "learning_rate": 7.826920982063605e-06, "loss": 0.5773, "step": 5172 }, { "epoch": 0.66, "grad_norm": 0.7168109881004537, "learning_rate": 7.826070041034742e-06, "loss": 0.5221, "step": 5173 }, { "epoch": 0.66, "grad_norm": 0.7240518256261765, "learning_rate": 7.825218979709108e-06, "loss": 0.5505, "step": 5174 }, { "epoch": 0.66, "grad_norm": 0.5734171472591354, "learning_rate": 7.824367798122931e-06, "loss": 0.5272, "step": 5175 }, { "epoch": 0.66, "grad_norm": 0.6215298058927083, "learning_rate": 7.823516496312443e-06, "loss": 0.5087, "step": 5176 }, { "epoch": 0.66, "grad_norm": 0.7002335222577787, "learning_rate": 7.822665074313878e-06, "loss": 0.5872, "step": 5177 }, { "epoch": 0.66, "grad_norm": 0.8213398502255651, "learning_rate": 7.821813532163483e-06, "loss": 0.6456, "step": 5178 }, { "epoch": 0.66, "grad_norm": 0.8025881365393628, "learning_rate": 7.820961869897502e-06, "loss": 0.6218, "step": 5179 }, { "epoch": 0.66, "grad_norm": 0.6121564975239244, "learning_rate": 7.820110087552189e-06, "loss": 0.5248, "step": 5180 }, { "epoch": 0.66, "grad_norm": 0.7328072190869244, "learning_rate": 7.819258185163801e-06, "loss": 0.5726, "step": 5181 }, { "epoch": 0.66, "grad_norm": 0.585275707297132, "learning_rate": 7.818406162768602e-06, "loss": 0.5403, "step": 5182 }, { "epoch": 0.66, "grad_norm": 0.6375131315404875, "learning_rate": 7.817554020402859e-06, "loss": 0.5232, "step": 5183 }, { "epoch": 0.66, "grad_norm": 0.6944233175821881, "learning_rate": 7.816701758102845e-06, "loss": 0.5848, "step": 5184 }, { "epoch": 0.66, "grad_norm": 0.8608587649765883, "learning_rate": 7.815849375904838e-06, "loss": 0.6514, "step": 5185 }, { "epoch": 0.66, "grad_norm": 0.6201659343923066, "learning_rate": 7.81499687384512e-06, "loss": 0.5047, "step": 5186 }, { "epoch": 0.66, "grad_norm": 0.7537271775227715, "learning_rate": 7.814144251959981e-06, "loss": 0.5925, "step": 5187 }, { "epoch": 0.66, "grad_norm": 0.7287519655576999, "learning_rate": 7.813291510285713e-06, "loss": 0.5735, "step": 5188 }, { "epoch": 0.66, "grad_norm": 0.6470465468464696, "learning_rate": 7.812438648858617e-06, "loss": 0.4743, "step": 5189 }, { "epoch": 0.66, "grad_norm": 0.6886438333736695, "learning_rate": 7.811585667714995e-06, "loss": 0.5626, "step": 5190 }, { "epoch": 0.66, "grad_norm": 0.6946642431259499, "learning_rate": 7.810732566891154e-06, "loss": 0.6052, "step": 5191 }, { "epoch": 0.66, "grad_norm": 0.6308731610498515, "learning_rate": 7.809879346423413e-06, "loss": 0.5125, "step": 5192 }, { "epoch": 0.66, "grad_norm": 0.6106891296862297, "learning_rate": 7.809026006348084e-06, "loss": 0.5201, "step": 5193 }, { "epoch": 0.66, "grad_norm": 0.5615808931381902, "learning_rate": 7.808172546701494e-06, "loss": 0.5032, "step": 5194 }, { "epoch": 0.66, "grad_norm": 0.7863016380805363, "learning_rate": 7.807318967519974e-06, "loss": 0.6359, "step": 5195 }, { "epoch": 0.66, "grad_norm": 0.745459832326678, "learning_rate": 7.806465268839856e-06, "loss": 0.5823, "step": 5196 }, { "epoch": 0.66, "grad_norm": 0.8146838361068188, "learning_rate": 7.80561145069748e-06, "loss": 0.6344, "step": 5197 }, { "epoch": 0.66, "grad_norm": 0.727117847301932, "learning_rate": 7.80475751312919e-06, "loss": 0.5705, "step": 5198 }, { "epoch": 0.66, "grad_norm": 0.5648858992023471, "learning_rate": 7.803903456171335e-06, "loss": 0.5066, "step": 5199 }, { "epoch": 0.66, "grad_norm": 0.6611353478365266, "learning_rate": 7.803049279860269e-06, "loss": 0.5384, "step": 5200 }, { "epoch": 0.66, "grad_norm": 0.8299260810403722, "learning_rate": 7.802194984232354e-06, "loss": 0.6377, "step": 5201 }, { "epoch": 0.66, "grad_norm": 0.5871918397856358, "learning_rate": 7.801340569323951e-06, "loss": 0.4882, "step": 5202 }, { "epoch": 0.66, "grad_norm": 0.6599277193781682, "learning_rate": 7.800486035171435e-06, "loss": 0.5231, "step": 5203 }, { "epoch": 0.66, "grad_norm": 0.8046535975450259, "learning_rate": 7.799631381811176e-06, "loss": 0.5779, "step": 5204 }, { "epoch": 0.66, "grad_norm": 0.701747700038936, "learning_rate": 7.798776609279556e-06, "loss": 0.5708, "step": 5205 }, { "epoch": 0.66, "grad_norm": 0.6359479503432651, "learning_rate": 7.797921717612958e-06, "loss": 0.5053, "step": 5206 }, { "epoch": 0.66, "grad_norm": 0.6905023218957824, "learning_rate": 7.797066706847774e-06, "loss": 0.6112, "step": 5207 }, { "epoch": 0.66, "grad_norm": 0.6848635570336745, "learning_rate": 7.796211577020397e-06, "loss": 0.5701, "step": 5208 }, { "epoch": 0.66, "grad_norm": 0.7129518356470682, "learning_rate": 7.79535632816723e-06, "loss": 0.6381, "step": 5209 }, { "epoch": 0.66, "grad_norm": 0.6438700220277365, "learning_rate": 7.794500960324678e-06, "loss": 0.5188, "step": 5210 }, { "epoch": 0.66, "grad_norm": 0.7216950014145777, "learning_rate": 7.793645473529149e-06, "loss": 0.5319, "step": 5211 }, { "epoch": 0.66, "grad_norm": 0.8354405557326996, "learning_rate": 7.792789867817058e-06, "loss": 0.624, "step": 5212 }, { "epoch": 0.66, "grad_norm": 0.7324552468722789, "learning_rate": 7.791934143224829e-06, "loss": 0.6057, "step": 5213 }, { "epoch": 0.66, "grad_norm": 0.7308574461977455, "learning_rate": 7.791078299788884e-06, "loss": 0.5678, "step": 5214 }, { "epoch": 0.66, "grad_norm": 0.5744071979513701, "learning_rate": 7.790222337545654e-06, "loss": 0.5048, "step": 5215 }, { "epoch": 0.66, "grad_norm": 0.6416332515529836, "learning_rate": 7.789366256531576e-06, "loss": 0.5444, "step": 5216 }, { "epoch": 0.66, "grad_norm": 1.6933249241088075, "learning_rate": 7.788510056783092e-06, "loss": 0.6455, "step": 5217 }, { "epoch": 0.66, "grad_norm": 0.8108136984915921, "learning_rate": 7.787653738336643e-06, "loss": 0.6444, "step": 5218 }, { "epoch": 0.66, "grad_norm": 0.6048875555064881, "learning_rate": 7.786797301228684e-06, "loss": 0.5507, "step": 5219 }, { "epoch": 0.67, "grad_norm": 0.930254418294972, "learning_rate": 7.785940745495668e-06, "loss": 0.6532, "step": 5220 }, { "epoch": 0.67, "grad_norm": 0.7822429191860759, "learning_rate": 7.785084071174058e-06, "loss": 0.5916, "step": 5221 }, { "epoch": 0.67, "grad_norm": 0.9170950010327478, "learning_rate": 7.784227278300318e-06, "loss": 0.6079, "step": 5222 }, { "epoch": 0.67, "grad_norm": 0.673604555362709, "learning_rate": 7.783370366910922e-06, "loss": 0.5987, "step": 5223 }, { "epoch": 0.67, "grad_norm": 0.6986338181314327, "learning_rate": 7.78251333704234e-06, "loss": 0.6355, "step": 5224 }, { "epoch": 0.67, "grad_norm": 0.6307382224520708, "learning_rate": 7.781656188731062e-06, "loss": 0.5691, "step": 5225 }, { "epoch": 0.67, "grad_norm": 0.6829051843925056, "learning_rate": 7.780798922013565e-06, "loss": 0.5189, "step": 5226 }, { "epoch": 0.67, "grad_norm": 0.6749744785196069, "learning_rate": 7.779941536926345e-06, "loss": 0.5651, "step": 5227 }, { "epoch": 0.67, "grad_norm": 0.6058614720841491, "learning_rate": 7.779084033505899e-06, "loss": 0.522, "step": 5228 }, { "epoch": 0.67, "grad_norm": 0.8298931683925259, "learning_rate": 7.778226411788725e-06, "loss": 0.62, "step": 5229 }, { "epoch": 0.67, "grad_norm": 0.7912802282173766, "learning_rate": 7.77736867181133e-06, "loss": 0.6445, "step": 5230 }, { "epoch": 0.67, "grad_norm": 0.8074130351241093, "learning_rate": 7.776510813610225e-06, "loss": 0.6185, "step": 5231 }, { "epoch": 0.67, "grad_norm": 0.7578963500252686, "learning_rate": 7.77565283722193e-06, "loss": 0.5821, "step": 5232 }, { "epoch": 0.67, "grad_norm": 0.6218647626778033, "learning_rate": 7.774794742682961e-06, "loss": 0.5173, "step": 5233 }, { "epoch": 0.67, "grad_norm": 0.6471351305066092, "learning_rate": 7.773936530029849e-06, "loss": 0.5417, "step": 5234 }, { "epoch": 0.67, "grad_norm": 0.7390504882646253, "learning_rate": 7.77307819929912e-06, "loss": 0.6398, "step": 5235 }, { "epoch": 0.67, "grad_norm": 0.7753625720932061, "learning_rate": 7.772219750527316e-06, "loss": 0.652, "step": 5236 }, { "epoch": 0.67, "grad_norm": 0.6519807511949876, "learning_rate": 7.771361183750975e-06, "loss": 0.5764, "step": 5237 }, { "epoch": 0.67, "grad_norm": 0.7810016907333998, "learning_rate": 7.770502499006645e-06, "loss": 0.6333, "step": 5238 }, { "epoch": 0.67, "grad_norm": 0.5838930966048808, "learning_rate": 7.769643696330877e-06, "loss": 0.5447, "step": 5239 }, { "epoch": 0.67, "grad_norm": 1.0001303086954352, "learning_rate": 7.768784775760228e-06, "loss": 0.6272, "step": 5240 }, { "epoch": 0.67, "grad_norm": 0.6931588109953511, "learning_rate": 7.767925737331256e-06, "loss": 0.5593, "step": 5241 }, { "epoch": 0.67, "grad_norm": 0.5662650751063849, "learning_rate": 7.767066581080534e-06, "loss": 0.4843, "step": 5242 }, { "epoch": 0.67, "grad_norm": 0.6550126948099457, "learning_rate": 7.766207307044627e-06, "loss": 0.5182, "step": 5243 }, { "epoch": 0.67, "grad_norm": 0.750923788877791, "learning_rate": 7.765347915260115e-06, "loss": 0.6606, "step": 5244 }, { "epoch": 0.67, "grad_norm": 0.7441033530674466, "learning_rate": 7.76448840576358e-06, "loss": 0.635, "step": 5245 }, { "epoch": 0.67, "grad_norm": 0.7096215012666898, "learning_rate": 7.763628778591607e-06, "loss": 0.7049, "step": 5246 }, { "epoch": 0.67, "grad_norm": 0.7676337863445605, "learning_rate": 7.76276903378079e-06, "loss": 0.6122, "step": 5247 }, { "epoch": 0.67, "grad_norm": 0.8137413588366649, "learning_rate": 7.761909171367722e-06, "loss": 0.6652, "step": 5248 }, { "epoch": 0.67, "grad_norm": 0.6437698150916662, "learning_rate": 7.761049191389007e-06, "loss": 0.5222, "step": 5249 }, { "epoch": 0.67, "grad_norm": 0.5676579201559623, "learning_rate": 7.76018909388125e-06, "loss": 0.5295, "step": 5250 }, { "epoch": 0.67, "grad_norm": 0.5614919750815595, "learning_rate": 7.759328878881065e-06, "loss": 0.5247, "step": 5251 }, { "epoch": 0.67, "grad_norm": 0.5382690537083782, "learning_rate": 7.758468546425065e-06, "loss": 0.5207, "step": 5252 }, { "epoch": 0.67, "grad_norm": 0.6729442376850997, "learning_rate": 7.757608096549876e-06, "loss": 0.595, "step": 5253 }, { "epoch": 0.67, "grad_norm": 0.6365130074032272, "learning_rate": 7.756747529292122e-06, "loss": 0.5417, "step": 5254 }, { "epoch": 0.67, "grad_norm": 0.8748126834650067, "learning_rate": 7.755886844688434e-06, "loss": 0.6467, "step": 5255 }, { "epoch": 0.67, "grad_norm": 0.6791106755834856, "learning_rate": 7.755026042775449e-06, "loss": 0.4985, "step": 5256 }, { "epoch": 0.67, "grad_norm": 1.1160212712389737, "learning_rate": 7.75416512358981e-06, "loss": 0.6542, "step": 5257 }, { "epoch": 0.67, "grad_norm": 0.5903330924371895, "learning_rate": 7.753304087168164e-06, "loss": 0.5641, "step": 5258 }, { "epoch": 0.67, "grad_norm": 0.7014534647262111, "learning_rate": 7.75244293354716e-06, "loss": 0.5763, "step": 5259 }, { "epoch": 0.67, "grad_norm": 0.578510819540876, "learning_rate": 7.751581662763457e-06, "loss": 0.5263, "step": 5260 }, { "epoch": 0.67, "grad_norm": 0.7657974706803282, "learning_rate": 7.750720274853714e-06, "loss": 0.5801, "step": 5261 }, { "epoch": 0.67, "grad_norm": 0.705884157972561, "learning_rate": 7.749858769854599e-06, "loss": 0.5401, "step": 5262 }, { "epoch": 0.67, "grad_norm": 0.6155866238805996, "learning_rate": 7.748997147802784e-06, "loss": 0.5116, "step": 5263 }, { "epoch": 0.67, "grad_norm": 0.7409621896719885, "learning_rate": 7.748135408734944e-06, "loss": 0.5894, "step": 5264 }, { "epoch": 0.67, "grad_norm": 0.6367364129316485, "learning_rate": 7.747273552687764e-06, "loss": 0.5494, "step": 5265 }, { "epoch": 0.67, "grad_norm": 0.846871606813435, "learning_rate": 7.746411579697925e-06, "loss": 0.6348, "step": 5266 }, { "epoch": 0.67, "grad_norm": 0.788538756400709, "learning_rate": 7.745549489802122e-06, "loss": 0.5791, "step": 5267 }, { "epoch": 0.67, "grad_norm": 0.6446296980648142, "learning_rate": 7.744687283037052e-06, "loss": 0.5506, "step": 5268 }, { "epoch": 0.67, "grad_norm": 0.79133393596402, "learning_rate": 7.743824959439414e-06, "loss": 0.6569, "step": 5269 }, { "epoch": 0.67, "grad_norm": 0.9501967627153795, "learning_rate": 7.742962519045918e-06, "loss": 0.6764, "step": 5270 }, { "epoch": 0.67, "grad_norm": 0.7381205333942157, "learning_rate": 7.74209996189327e-06, "loss": 0.5695, "step": 5271 }, { "epoch": 0.67, "grad_norm": 0.6540025867844753, "learning_rate": 7.741237288018191e-06, "loss": 0.5615, "step": 5272 }, { "epoch": 0.67, "grad_norm": 0.7181243194896826, "learning_rate": 7.740374497457398e-06, "loss": 0.6131, "step": 5273 }, { "epoch": 0.67, "grad_norm": 0.6566134632869463, "learning_rate": 7.739511590247621e-06, "loss": 0.6021, "step": 5274 }, { "epoch": 0.67, "grad_norm": 0.8969440704119592, "learning_rate": 7.738648566425591e-06, "loss": 0.5757, "step": 5275 }, { "epoch": 0.67, "grad_norm": 0.7802441121795158, "learning_rate": 7.737785426028041e-06, "loss": 0.6476, "step": 5276 }, { "epoch": 0.67, "grad_norm": 0.91875618050464, "learning_rate": 7.736922169091716e-06, "loss": 0.5961, "step": 5277 }, { "epoch": 0.67, "grad_norm": 0.8098334712758746, "learning_rate": 7.736058795653359e-06, "loss": 0.6284, "step": 5278 }, { "epoch": 0.67, "grad_norm": 0.7593980031617001, "learning_rate": 7.73519530574972e-06, "loss": 0.6243, "step": 5279 }, { "epoch": 0.67, "grad_norm": 0.7599905609209325, "learning_rate": 7.73433169941756e-06, "loss": 0.5997, "step": 5280 }, { "epoch": 0.67, "grad_norm": 0.6565273382115473, "learning_rate": 7.733467976693637e-06, "loss": 0.5329, "step": 5281 }, { "epoch": 0.67, "grad_norm": 0.5459231059576544, "learning_rate": 7.732604137614718e-06, "loss": 0.5042, "step": 5282 }, { "epoch": 0.67, "grad_norm": 0.731271369521869, "learning_rate": 7.73174018221757e-06, "loss": 0.6515, "step": 5283 }, { "epoch": 0.67, "grad_norm": 0.6690327574379991, "learning_rate": 7.730876110538975e-06, "loss": 0.584, "step": 5284 }, { "epoch": 0.67, "grad_norm": 1.1085067758957585, "learning_rate": 7.73001192261571e-06, "loss": 0.7003, "step": 5285 }, { "epoch": 0.67, "grad_norm": 0.5691674781479082, "learning_rate": 7.72914761848456e-06, "loss": 0.5211, "step": 5286 }, { "epoch": 0.67, "grad_norm": 0.7834924281762433, "learning_rate": 7.728283198182318e-06, "loss": 0.6152, "step": 5287 }, { "epoch": 0.67, "grad_norm": 0.8045769745747544, "learning_rate": 7.727418661745778e-06, "loss": 0.6364, "step": 5288 }, { "epoch": 0.67, "grad_norm": 0.6718009128217338, "learning_rate": 7.726554009211741e-06, "loss": 0.591, "step": 5289 }, { "epoch": 0.67, "grad_norm": 0.7679383615974319, "learning_rate": 7.725689240617013e-06, "loss": 0.6062, "step": 5290 }, { "epoch": 0.67, "grad_norm": 0.6527731247062422, "learning_rate": 7.724824355998407e-06, "loss": 0.5649, "step": 5291 }, { "epoch": 0.67, "grad_norm": 0.6553288435708892, "learning_rate": 7.723959355392731e-06, "loss": 0.6194, "step": 5292 }, { "epoch": 0.67, "grad_norm": 0.6506231333113452, "learning_rate": 7.723094238836812e-06, "loss": 0.5562, "step": 5293 }, { "epoch": 0.67, "grad_norm": 1.151922057213763, "learning_rate": 7.722229006367472e-06, "loss": 0.6436, "step": 5294 }, { "epoch": 0.67, "grad_norm": 0.628292377199441, "learning_rate": 7.721363658021544e-06, "loss": 0.5476, "step": 5295 }, { "epoch": 0.67, "grad_norm": 0.6171340045203727, "learning_rate": 7.72049819383586e-06, "loss": 0.5805, "step": 5296 }, { "epoch": 0.67, "grad_norm": 0.7147657361764136, "learning_rate": 7.719632613847261e-06, "loss": 0.5796, "step": 5297 }, { "epoch": 0.67, "grad_norm": 0.8015877735837139, "learning_rate": 7.718766918092591e-06, "loss": 0.5992, "step": 5298 }, { "epoch": 0.68, "grad_norm": 0.7026833989226147, "learning_rate": 7.717901106608702e-06, "loss": 0.6074, "step": 5299 }, { "epoch": 0.68, "grad_norm": 0.8114330970205671, "learning_rate": 7.71703517943245e-06, "loss": 0.4924, "step": 5300 }, { "epoch": 0.68, "grad_norm": 0.7083457757063296, "learning_rate": 7.716169136600693e-06, "loss": 0.6061, "step": 5301 }, { "epoch": 0.68, "grad_norm": 0.5583808615003, "learning_rate": 7.715302978150294e-06, "loss": 0.5014, "step": 5302 }, { "epoch": 0.68, "grad_norm": 0.5871350625935311, "learning_rate": 7.714436704118125e-06, "loss": 0.5239, "step": 5303 }, { "epoch": 0.68, "grad_norm": 0.6299127194613465, "learning_rate": 7.713570314541058e-06, "loss": 0.553, "step": 5304 }, { "epoch": 0.68, "grad_norm": 0.8586505503492021, "learning_rate": 7.712703809455975e-06, "loss": 0.5921, "step": 5305 }, { "epoch": 0.68, "grad_norm": 0.6091045261816805, "learning_rate": 7.71183718889976e-06, "loss": 0.5248, "step": 5306 }, { "epoch": 0.68, "grad_norm": 0.6034704887543436, "learning_rate": 7.7109704529093e-06, "loss": 0.5276, "step": 5307 }, { "epoch": 0.68, "grad_norm": 0.7722605894684905, "learning_rate": 7.710103601521492e-06, "loss": 0.6225, "step": 5308 }, { "epoch": 0.68, "grad_norm": 0.6710929798497455, "learning_rate": 7.709236634773236e-06, "loss": 0.4731, "step": 5309 }, { "epoch": 0.68, "grad_norm": 0.7655579462306464, "learning_rate": 7.708369552701432e-06, "loss": 0.5772, "step": 5310 }, { "epoch": 0.68, "grad_norm": 0.7879824673690861, "learning_rate": 7.707502355342989e-06, "loss": 0.627, "step": 5311 }, { "epoch": 0.68, "grad_norm": 0.6507972696036635, "learning_rate": 7.706635042734824e-06, "loss": 0.5595, "step": 5312 }, { "epoch": 0.68, "grad_norm": 0.6743730344430495, "learning_rate": 7.705767614913854e-06, "loss": 0.534, "step": 5313 }, { "epoch": 0.68, "grad_norm": 0.6790101412040879, "learning_rate": 7.704900071917005e-06, "loss": 0.5905, "step": 5314 }, { "epoch": 0.68, "grad_norm": 0.8545274723722545, "learning_rate": 7.704032413781202e-06, "loss": 0.6125, "step": 5315 }, { "epoch": 0.68, "grad_norm": 0.6313573737284246, "learning_rate": 7.703164640543382e-06, "loss": 0.5129, "step": 5316 }, { "epoch": 0.68, "grad_norm": 0.6075125460045357, "learning_rate": 7.702296752240477e-06, "loss": 0.5553, "step": 5317 }, { "epoch": 0.68, "grad_norm": 0.6638290762596978, "learning_rate": 7.701428748909437e-06, "loss": 0.5404, "step": 5318 }, { "epoch": 0.68, "grad_norm": 0.6688444155868246, "learning_rate": 7.700560630587207e-06, "loss": 0.5494, "step": 5319 }, { "epoch": 0.68, "grad_norm": 0.6266998309402686, "learning_rate": 7.699692397310743e-06, "loss": 0.5443, "step": 5320 }, { "epoch": 0.68, "grad_norm": 0.6244766913497802, "learning_rate": 7.698824049117e-06, "loss": 0.5316, "step": 5321 }, { "epoch": 0.68, "grad_norm": 0.8014393174243583, "learning_rate": 7.69795558604294e-06, "loss": 0.6258, "step": 5322 }, { "epoch": 0.68, "grad_norm": 0.7211660414928572, "learning_rate": 7.697087008125532e-06, "loss": 0.6665, "step": 5323 }, { "epoch": 0.68, "grad_norm": 0.7058654858828981, "learning_rate": 7.696218315401751e-06, "loss": 0.5718, "step": 5324 }, { "epoch": 0.68, "grad_norm": 0.6178785197188702, "learning_rate": 7.69534950790857e-06, "loss": 0.5029, "step": 5325 }, { "epoch": 0.68, "grad_norm": 0.6951047346040349, "learning_rate": 7.694480585682975e-06, "loss": 0.5487, "step": 5326 }, { "epoch": 0.68, "grad_norm": 0.8982887178203129, "learning_rate": 7.69361154876195e-06, "loss": 0.6249, "step": 5327 }, { "epoch": 0.68, "grad_norm": 0.6800590187037101, "learning_rate": 7.69274239718249e-06, "loss": 0.5264, "step": 5328 }, { "epoch": 0.68, "grad_norm": 0.6003970713816201, "learning_rate": 7.691873130981591e-06, "loss": 0.5264, "step": 5329 }, { "epoch": 0.68, "grad_norm": 0.6240141298785197, "learning_rate": 7.691003750196256e-06, "loss": 0.5584, "step": 5330 }, { "epoch": 0.68, "grad_norm": 0.9179250672137081, "learning_rate": 7.690134254863488e-06, "loss": 0.6387, "step": 5331 }, { "epoch": 0.68, "grad_norm": 0.58800512162918, "learning_rate": 7.689264645020303e-06, "loss": 0.5629, "step": 5332 }, { "epoch": 0.68, "grad_norm": 0.5639766958796449, "learning_rate": 7.688394920703715e-06, "loss": 0.5339, "step": 5333 }, { "epoch": 0.68, "grad_norm": 0.8565263335822307, "learning_rate": 7.687525081950747e-06, "loss": 0.599, "step": 5334 }, { "epoch": 0.68, "grad_norm": 0.8002941713789701, "learning_rate": 7.686655128798423e-06, "loss": 0.5762, "step": 5335 }, { "epoch": 0.68, "grad_norm": 0.7013084372962467, "learning_rate": 7.685785061283776e-06, "loss": 0.5826, "step": 5336 }, { "epoch": 0.68, "grad_norm": 0.71366480717337, "learning_rate": 7.684914879443841e-06, "loss": 0.5865, "step": 5337 }, { "epoch": 0.68, "grad_norm": 0.5942434556786707, "learning_rate": 7.684044583315661e-06, "loss": 0.5539, "step": 5338 }, { "epoch": 0.68, "grad_norm": 0.8754726154923553, "learning_rate": 7.68317417293628e-06, "loss": 0.6342, "step": 5339 }, { "epoch": 0.68, "grad_norm": 0.7556984430068773, "learning_rate": 7.682303648342746e-06, "loss": 0.5984, "step": 5340 }, { "epoch": 0.68, "grad_norm": 0.6625352947072592, "learning_rate": 7.681433009572119e-06, "loss": 0.5482, "step": 5341 }, { "epoch": 0.68, "grad_norm": 0.66197751487079, "learning_rate": 7.680562256661457e-06, "loss": 0.5302, "step": 5342 }, { "epoch": 0.68, "grad_norm": 0.7109019654491981, "learning_rate": 7.679691389647825e-06, "loss": 0.5847, "step": 5343 }, { "epoch": 0.68, "grad_norm": 0.7787371126541046, "learning_rate": 7.678820408568295e-06, "loss": 0.6185, "step": 5344 }, { "epoch": 0.68, "grad_norm": 0.613323840463627, "learning_rate": 7.67794931345994e-06, "loss": 0.5122, "step": 5345 }, { "epoch": 0.68, "grad_norm": 0.6187148388381738, "learning_rate": 7.67707810435984e-06, "loss": 0.5817, "step": 5346 }, { "epoch": 0.68, "grad_norm": 0.6664703757482348, "learning_rate": 7.67620678130508e-06, "loss": 0.5617, "step": 5347 }, { "epoch": 0.68, "grad_norm": 0.8225262527102246, "learning_rate": 7.67533534433275e-06, "loss": 0.6193, "step": 5348 }, { "epoch": 0.68, "grad_norm": 0.6386208623915298, "learning_rate": 7.674463793479943e-06, "loss": 0.555, "step": 5349 }, { "epoch": 0.68, "grad_norm": 0.5982119011369568, "learning_rate": 7.673592128783759e-06, "loss": 0.5243, "step": 5350 }, { "epoch": 0.68, "grad_norm": 0.6490819455678268, "learning_rate": 7.6727203502813e-06, "loss": 0.4916, "step": 5351 }, { "epoch": 0.68, "grad_norm": 0.6128393412017639, "learning_rate": 7.67184845800968e-06, "loss": 0.5057, "step": 5352 }, { "epoch": 0.68, "grad_norm": 0.6780731281205047, "learning_rate": 7.670976452006004e-06, "loss": 0.5384, "step": 5353 }, { "epoch": 0.68, "grad_norm": 0.5690235257815663, "learning_rate": 7.6701043323074e-06, "loss": 0.4932, "step": 5354 }, { "epoch": 0.68, "grad_norm": 0.7420962431359314, "learning_rate": 7.669232098950985e-06, "loss": 0.6022, "step": 5355 }, { "epoch": 0.68, "grad_norm": 0.635443809239543, "learning_rate": 7.66835975197389e-06, "loss": 0.5794, "step": 5356 }, { "epoch": 0.68, "grad_norm": 0.7707091924932612, "learning_rate": 7.667487291413247e-06, "loss": 0.616, "step": 5357 }, { "epoch": 0.68, "grad_norm": 0.5705184601449362, "learning_rate": 7.666614717306193e-06, "loss": 0.5002, "step": 5358 }, { "epoch": 0.68, "grad_norm": 1.1716942186392239, "learning_rate": 7.665742029689874e-06, "loss": 0.6027, "step": 5359 }, { "epoch": 0.68, "grad_norm": 0.7184567838817191, "learning_rate": 7.664869228601433e-06, "loss": 0.5835, "step": 5360 }, { "epoch": 0.68, "grad_norm": 0.7024049582923059, "learning_rate": 7.663996314078025e-06, "loss": 0.5724, "step": 5361 }, { "epoch": 0.68, "grad_norm": 0.539286470647893, "learning_rate": 7.663123286156809e-06, "loss": 0.4751, "step": 5362 }, { "epoch": 0.68, "grad_norm": 0.6091919738277533, "learning_rate": 7.662250144874943e-06, "loss": 0.5182, "step": 5363 }, { "epoch": 0.68, "grad_norm": 0.582008114127871, "learning_rate": 7.661376890269595e-06, "loss": 0.5199, "step": 5364 }, { "epoch": 0.68, "grad_norm": 0.5617807601456263, "learning_rate": 7.660503522377938e-06, "loss": 0.5081, "step": 5365 }, { "epoch": 0.68, "grad_norm": 0.6430367415995056, "learning_rate": 7.659630041237148e-06, "loss": 0.5466, "step": 5366 }, { "epoch": 0.68, "grad_norm": 0.6385091164378073, "learning_rate": 7.658756446884405e-06, "loss": 0.5704, "step": 5367 }, { "epoch": 0.68, "grad_norm": 0.682410731978004, "learning_rate": 7.657882739356898e-06, "loss": 0.5996, "step": 5368 }, { "epoch": 0.68, "grad_norm": 0.7599238778128445, "learning_rate": 7.657008918691816e-06, "loss": 0.6487, "step": 5369 }, { "epoch": 0.68, "grad_norm": 0.8529496451834723, "learning_rate": 7.656134984926354e-06, "loss": 0.6324, "step": 5370 }, { "epoch": 0.68, "grad_norm": 0.5668754506049037, "learning_rate": 7.655260938097715e-06, "loss": 0.5344, "step": 5371 }, { "epoch": 0.68, "grad_norm": 0.6120986039999534, "learning_rate": 7.654386778243102e-06, "loss": 0.5122, "step": 5372 }, { "epoch": 0.68, "grad_norm": 0.5789339225907278, "learning_rate": 7.653512505399725e-06, "loss": 0.5343, "step": 5373 }, { "epoch": 0.68, "grad_norm": 0.6454722610257745, "learning_rate": 7.6526381196048e-06, "loss": 0.5546, "step": 5374 }, { "epoch": 0.68, "grad_norm": 0.7044088838158508, "learning_rate": 7.65176362089555e-06, "loss": 0.6006, "step": 5375 }, { "epoch": 0.68, "grad_norm": 0.581249987029883, "learning_rate": 7.650889009309192e-06, "loss": 0.512, "step": 5376 }, { "epoch": 0.69, "grad_norm": 0.8576964304073412, "learning_rate": 7.650014284882963e-06, "loss": 0.6489, "step": 5377 }, { "epoch": 0.69, "grad_norm": 0.545865383601175, "learning_rate": 7.649139447654093e-06, "loss": 0.4896, "step": 5378 }, { "epoch": 0.69, "grad_norm": 0.6026253881184362, "learning_rate": 7.648264497659823e-06, "loss": 0.5201, "step": 5379 }, { "epoch": 0.69, "grad_norm": 0.810654222136975, "learning_rate": 7.647389434937396e-06, "loss": 0.5675, "step": 5380 }, { "epoch": 0.69, "grad_norm": 0.6133869022233376, "learning_rate": 7.64651425952406e-06, "loss": 0.5084, "step": 5381 }, { "epoch": 0.69, "grad_norm": 0.8385442460914531, "learning_rate": 7.645638971457068e-06, "loss": 0.5815, "step": 5382 }, { "epoch": 0.69, "grad_norm": 0.5621602526736209, "learning_rate": 7.64476357077368e-06, "loss": 0.4776, "step": 5383 }, { "epoch": 0.69, "grad_norm": 0.6911636496925291, "learning_rate": 7.643888057511159e-06, "loss": 0.5934, "step": 5384 }, { "epoch": 0.69, "grad_norm": 0.6843306488745862, "learning_rate": 7.643012431706771e-06, "loss": 0.5266, "step": 5385 }, { "epoch": 0.69, "grad_norm": 0.7198955846421942, "learning_rate": 7.64213669339779e-06, "loss": 0.5934, "step": 5386 }, { "epoch": 0.69, "grad_norm": 0.6491076186574289, "learning_rate": 7.641260842621494e-06, "loss": 0.5063, "step": 5387 }, { "epoch": 0.69, "grad_norm": 0.6641918581037313, "learning_rate": 7.640384879415164e-06, "loss": 0.5459, "step": 5388 }, { "epoch": 0.69, "grad_norm": 0.7564332359919856, "learning_rate": 7.639508803816087e-06, "loss": 0.6161, "step": 5389 }, { "epoch": 0.69, "grad_norm": 0.5403020534820326, "learning_rate": 7.638632615861555e-06, "loss": 0.4545, "step": 5390 }, { "epoch": 0.69, "grad_norm": 0.7666645707024677, "learning_rate": 7.637756315588862e-06, "loss": 0.6201, "step": 5391 }, { "epoch": 0.69, "grad_norm": 0.835280670452697, "learning_rate": 7.636879903035314e-06, "loss": 0.617, "step": 5392 }, { "epoch": 0.69, "grad_norm": 0.6937853179061539, "learning_rate": 7.636003378238215e-06, "loss": 0.5016, "step": 5393 }, { "epoch": 0.69, "grad_norm": 0.6569690233668273, "learning_rate": 7.635126741234873e-06, "loss": 0.5547, "step": 5394 }, { "epoch": 0.69, "grad_norm": 0.7645363855007361, "learning_rate": 7.63424999206261e-06, "loss": 0.6731, "step": 5395 }, { "epoch": 0.69, "grad_norm": 0.5415710308418598, "learning_rate": 7.633373130758741e-06, "loss": 0.5246, "step": 5396 }, { "epoch": 0.69, "grad_norm": 0.7086320400014732, "learning_rate": 7.632496157360591e-06, "loss": 0.5988, "step": 5397 }, { "epoch": 0.69, "grad_norm": 0.5935657497173328, "learning_rate": 7.631619071905494e-06, "loss": 0.5018, "step": 5398 }, { "epoch": 0.69, "grad_norm": 0.5834521141203658, "learning_rate": 7.630741874430782e-06, "loss": 0.5276, "step": 5399 }, { "epoch": 0.69, "grad_norm": 0.5481030974166777, "learning_rate": 7.629864564973798e-06, "loss": 0.4916, "step": 5400 }, { "epoch": 0.69, "grad_norm": 0.8143837782426001, "learning_rate": 7.62898714357188e-06, "loss": 0.6332, "step": 5401 }, { "epoch": 0.69, "grad_norm": 1.2293795422243798, "learning_rate": 7.62810961026238e-06, "loss": 0.6667, "step": 5402 }, { "epoch": 0.69, "grad_norm": 0.7048513179537687, "learning_rate": 7.627231965082653e-06, "loss": 0.535, "step": 5403 }, { "epoch": 0.69, "grad_norm": 0.7083127999938686, "learning_rate": 7.626354208070057e-06, "loss": 0.5071, "step": 5404 }, { "epoch": 0.69, "grad_norm": 0.5967341527148564, "learning_rate": 7.625476339261955e-06, "loss": 0.5027, "step": 5405 }, { "epoch": 0.69, "grad_norm": 0.6622242437396073, "learning_rate": 7.624598358695714e-06, "loss": 0.5336, "step": 5406 }, { "epoch": 0.69, "grad_norm": 0.6955140117451355, "learning_rate": 7.6237202664087095e-06, "loss": 0.5751, "step": 5407 }, { "epoch": 0.69, "grad_norm": 0.7103838535428126, "learning_rate": 7.622842062438317e-06, "loss": 0.6078, "step": 5408 }, { "epoch": 0.69, "grad_norm": 0.7475216235717348, "learning_rate": 7.621963746821919e-06, "loss": 0.6063, "step": 5409 }, { "epoch": 0.69, "grad_norm": 0.6925293170683312, "learning_rate": 7.621085319596904e-06, "loss": 0.6021, "step": 5410 }, { "epoch": 0.69, "grad_norm": 0.607102851080319, "learning_rate": 7.62020678080066e-06, "loss": 0.5005, "step": 5411 }, { "epoch": 0.69, "grad_norm": 0.6573127097147509, "learning_rate": 7.619328130470589e-06, "loss": 0.5143, "step": 5412 }, { "epoch": 0.69, "grad_norm": 0.724357688680548, "learning_rate": 7.618449368644087e-06, "loss": 0.6107, "step": 5413 }, { "epoch": 0.69, "grad_norm": 0.6472703477079979, "learning_rate": 7.617570495358565e-06, "loss": 0.5169, "step": 5414 }, { "epoch": 0.69, "grad_norm": 0.7683889088151806, "learning_rate": 7.61669151065143e-06, "loss": 0.5863, "step": 5415 }, { "epoch": 0.69, "grad_norm": 0.5961739433201265, "learning_rate": 7.615812414560102e-06, "loss": 0.5223, "step": 5416 }, { "epoch": 0.69, "grad_norm": 0.7755467138983485, "learning_rate": 7.614933207121996e-06, "loss": 0.609, "step": 5417 }, { "epoch": 0.69, "grad_norm": 0.6213787758286397, "learning_rate": 7.6140538883745394e-06, "loss": 0.5373, "step": 5418 }, { "epoch": 0.69, "grad_norm": 0.7239157114677028, "learning_rate": 7.613174458355164e-06, "loss": 0.6376, "step": 5419 }, { "epoch": 0.69, "grad_norm": 0.8068751355867426, "learning_rate": 7.612294917101302e-06, "loss": 0.6776, "step": 5420 }, { "epoch": 0.69, "grad_norm": 0.7230239352917054, "learning_rate": 7.611415264650392e-06, "loss": 0.643, "step": 5421 }, { "epoch": 0.69, "grad_norm": 0.6644945599757978, "learning_rate": 7.610535501039878e-06, "loss": 0.6106, "step": 5422 }, { "epoch": 0.69, "grad_norm": 0.5683173879743686, "learning_rate": 7.609655626307211e-06, "loss": 0.5046, "step": 5423 }, { "epoch": 0.69, "grad_norm": 0.7109541936676621, "learning_rate": 7.608775640489844e-06, "loss": 0.6107, "step": 5424 }, { "epoch": 0.69, "grad_norm": 0.6107410779022118, "learning_rate": 7.607895543625233e-06, "loss": 0.5724, "step": 5425 }, { "epoch": 0.69, "grad_norm": 0.7945096443773711, "learning_rate": 7.6070153357508435e-06, "loss": 0.6273, "step": 5426 }, { "epoch": 0.69, "grad_norm": 0.7070430298179371, "learning_rate": 7.6061350169041415e-06, "loss": 0.5953, "step": 5427 }, { "epoch": 0.69, "grad_norm": 0.7733949426303575, "learning_rate": 7.605254587122599e-06, "loss": 0.5823, "step": 5428 }, { "epoch": 0.69, "grad_norm": 0.7809194339482484, "learning_rate": 7.604374046443697e-06, "loss": 0.5006, "step": 5429 }, { "epoch": 0.69, "grad_norm": 0.6251040077474513, "learning_rate": 7.60349339490491e-06, "loss": 0.5755, "step": 5430 }, { "epoch": 0.69, "grad_norm": 0.795837385220522, "learning_rate": 7.602612632543732e-06, "loss": 0.5978, "step": 5431 }, { "epoch": 0.69, "grad_norm": 0.576667226610342, "learning_rate": 7.60173175939765e-06, "loss": 0.515, "step": 5432 }, { "epoch": 0.69, "grad_norm": 0.6468163008548756, "learning_rate": 7.60085077550416e-06, "loss": 0.5676, "step": 5433 }, { "epoch": 0.69, "grad_norm": 0.6780546892541153, "learning_rate": 7.599969680900765e-06, "loss": 0.5994, "step": 5434 }, { "epoch": 0.69, "grad_norm": 0.7657552987026378, "learning_rate": 7.5990884756249685e-06, "loss": 0.613, "step": 5435 }, { "epoch": 0.69, "grad_norm": 1.0754882894038966, "learning_rate": 7.5982071597142815e-06, "loss": 0.6183, "step": 5436 }, { "epoch": 0.69, "grad_norm": 0.5796398281401157, "learning_rate": 7.597325733206218e-06, "loss": 0.543, "step": 5437 }, { "epoch": 0.69, "grad_norm": 0.7832954875863424, "learning_rate": 7.596444196138297e-06, "loss": 0.6057, "step": 5438 }, { "epoch": 0.69, "grad_norm": 0.7701490375794444, "learning_rate": 7.595562548548045e-06, "loss": 0.6196, "step": 5439 }, { "epoch": 0.69, "grad_norm": 0.7453484478446744, "learning_rate": 7.5946807904729894e-06, "loss": 0.5259, "step": 5440 }, { "epoch": 0.69, "grad_norm": 0.5822669148490245, "learning_rate": 7.593798921950662e-06, "loss": 0.5312, "step": 5441 }, { "epoch": 0.69, "grad_norm": 0.7201675708314541, "learning_rate": 7.592916943018604e-06, "loss": 0.6383, "step": 5442 }, { "epoch": 0.69, "grad_norm": 0.7129321961162887, "learning_rate": 7.592034853714359e-06, "loss": 0.6, "step": 5443 }, { "epoch": 0.69, "grad_norm": 0.6956984087821456, "learning_rate": 7.591152654075473e-06, "loss": 0.6116, "step": 5444 }, { "epoch": 0.69, "grad_norm": 0.6734346629490349, "learning_rate": 7.590270344139499e-06, "loss": 0.5618, "step": 5445 }, { "epoch": 0.69, "grad_norm": 0.6328741817031723, "learning_rate": 7.589387923943992e-06, "loss": 0.5009, "step": 5446 }, { "epoch": 0.69, "grad_norm": 1.301681239723456, "learning_rate": 7.5885053935265155e-06, "loss": 0.6442, "step": 5447 }, { "epoch": 0.69, "grad_norm": 0.8176790790323062, "learning_rate": 7.587622752924637e-06, "loss": 0.6091, "step": 5448 }, { "epoch": 0.69, "grad_norm": 0.7297976101130947, "learning_rate": 7.586740002175928e-06, "loss": 0.5673, "step": 5449 }, { "epoch": 0.69, "grad_norm": 0.6506549382261865, "learning_rate": 7.585857141317962e-06, "loss": 0.5933, "step": 5450 }, { "epoch": 0.69, "grad_norm": 0.6608629796794794, "learning_rate": 7.584974170388319e-06, "loss": 0.5372, "step": 5451 }, { "epoch": 0.69, "grad_norm": 0.6446043855865727, "learning_rate": 7.584091089424589e-06, "loss": 0.5698, "step": 5452 }, { "epoch": 0.69, "grad_norm": 0.6616059324009207, "learning_rate": 7.583207898464356e-06, "loss": 0.5118, "step": 5453 }, { "epoch": 0.69, "grad_norm": 0.6768717887359736, "learning_rate": 7.582324597545219e-06, "loss": 0.5281, "step": 5454 }, { "epoch": 0.69, "grad_norm": 0.7068338364357585, "learning_rate": 7.581441186704776e-06, "loss": 0.5516, "step": 5455 }, { "epoch": 0.7, "grad_norm": 0.5690980398220856, "learning_rate": 7.580557665980631e-06, "loss": 0.5194, "step": 5456 }, { "epoch": 0.7, "grad_norm": 0.625019399391991, "learning_rate": 7.579674035410391e-06, "loss": 0.4861, "step": 5457 }, { "epoch": 0.7, "grad_norm": 0.6799176783698669, "learning_rate": 7.5787902950316725e-06, "loss": 0.5666, "step": 5458 }, { "epoch": 0.7, "grad_norm": 0.708875205693146, "learning_rate": 7.577906444882091e-06, "loss": 0.6049, "step": 5459 }, { "epoch": 0.7, "grad_norm": 0.5581560808860451, "learning_rate": 7.5770224849992705e-06, "loss": 0.5692, "step": 5460 }, { "epoch": 0.7, "grad_norm": 1.349474955178804, "learning_rate": 7.576138415420838e-06, "loss": 0.6313, "step": 5461 }, { "epoch": 0.7, "grad_norm": 0.6496242085299923, "learning_rate": 7.575254236184424e-06, "loss": 0.5417, "step": 5462 }, { "epoch": 0.7, "grad_norm": 0.7364391519847628, "learning_rate": 7.5743699473276664e-06, "loss": 0.6295, "step": 5463 }, { "epoch": 0.7, "grad_norm": 0.9249468560003917, "learning_rate": 7.573485548888209e-06, "loss": 0.6309, "step": 5464 }, { "epoch": 0.7, "grad_norm": 0.7308284489970758, "learning_rate": 7.572601040903693e-06, "loss": 0.5864, "step": 5465 }, { "epoch": 0.7, "grad_norm": 0.6646316896812953, "learning_rate": 7.571716423411772e-06, "loss": 0.5318, "step": 5466 }, { "epoch": 0.7, "grad_norm": 0.7871039854942252, "learning_rate": 7.570831696450101e-06, "loss": 0.6606, "step": 5467 }, { "epoch": 0.7, "grad_norm": 0.5777473358378997, "learning_rate": 7.569946860056341e-06, "loss": 0.5046, "step": 5468 }, { "epoch": 0.7, "grad_norm": 0.6865903037439381, "learning_rate": 7.569061914268154e-06, "loss": 0.5421, "step": 5469 }, { "epoch": 0.7, "grad_norm": 0.6264948735094058, "learning_rate": 7.568176859123212e-06, "loss": 0.5668, "step": 5470 }, { "epoch": 0.7, "grad_norm": 0.8757852540791926, "learning_rate": 7.5672916946591866e-06, "loss": 0.6485, "step": 5471 }, { "epoch": 0.7, "grad_norm": 0.7127381619652646, "learning_rate": 7.566406420913759e-06, "loss": 0.6035, "step": 5472 }, { "epoch": 0.7, "grad_norm": 0.7086887400407788, "learning_rate": 7.565521037924612e-06, "loss": 0.5203, "step": 5473 }, { "epoch": 0.7, "grad_norm": 0.7432649571650681, "learning_rate": 7.564635545729431e-06, "loss": 0.59, "step": 5474 }, { "epoch": 0.7, "grad_norm": 0.632263530343266, "learning_rate": 7.563749944365909e-06, "loss": 0.5628, "step": 5475 }, { "epoch": 0.7, "grad_norm": 0.7524947352704826, "learning_rate": 7.562864233871746e-06, "loss": 0.534, "step": 5476 }, { "epoch": 0.7, "grad_norm": 0.5442806979374378, "learning_rate": 7.561978414284643e-06, "loss": 0.5033, "step": 5477 }, { "epoch": 0.7, "grad_norm": 0.731510330305729, "learning_rate": 7.561092485642305e-06, "loss": 0.6024, "step": 5478 }, { "epoch": 0.7, "grad_norm": 0.6656624046368786, "learning_rate": 7.560206447982443e-06, "loss": 0.5929, "step": 5479 }, { "epoch": 0.7, "grad_norm": 0.7745102302614559, "learning_rate": 7.559320301342775e-06, "loss": 0.5565, "step": 5480 }, { "epoch": 0.7, "grad_norm": 0.6797113051754411, "learning_rate": 7.55843404576102e-06, "loss": 0.5854, "step": 5481 }, { "epoch": 0.7, "grad_norm": 0.7466460173279136, "learning_rate": 7.557547681274903e-06, "loss": 0.6156, "step": 5482 }, { "epoch": 0.7, "grad_norm": 0.7288466337400398, "learning_rate": 7.556661207922156e-06, "loss": 0.5199, "step": 5483 }, { "epoch": 0.7, "grad_norm": 1.0119325422624517, "learning_rate": 7.555774625740509e-06, "loss": 0.649, "step": 5484 }, { "epoch": 0.7, "grad_norm": 0.7638263102320347, "learning_rate": 7.554887934767703e-06, "loss": 0.6565, "step": 5485 }, { "epoch": 0.7, "grad_norm": 0.7065218600244592, "learning_rate": 7.554001135041482e-06, "loss": 0.5312, "step": 5486 }, { "epoch": 0.7, "grad_norm": 0.6434874856143814, "learning_rate": 7.553114226599595e-06, "loss": 0.5571, "step": 5487 }, { "epoch": 0.7, "grad_norm": 0.6323723664848917, "learning_rate": 7.552227209479794e-06, "loss": 0.5305, "step": 5488 }, { "epoch": 0.7, "grad_norm": 0.7267461648954927, "learning_rate": 7.5513400837198355e-06, "loss": 0.5722, "step": 5489 }, { "epoch": 0.7, "grad_norm": 0.6671882548083844, "learning_rate": 7.550452849357484e-06, "loss": 0.6023, "step": 5490 }, { "epoch": 0.7, "grad_norm": 0.6674776173780151, "learning_rate": 7.5495655064305025e-06, "loss": 0.5253, "step": 5491 }, { "epoch": 0.7, "grad_norm": 0.746430621867441, "learning_rate": 7.548678054976666e-06, "loss": 0.6356, "step": 5492 }, { "epoch": 0.7, "grad_norm": 0.6746456754429112, "learning_rate": 7.54779049503375e-06, "loss": 0.5319, "step": 5493 }, { "epoch": 0.7, "grad_norm": 0.5909693788870325, "learning_rate": 7.546902826639533e-06, "loss": 0.5263, "step": 5494 }, { "epoch": 0.7, "grad_norm": 0.6822441732105623, "learning_rate": 7.546015049831802e-06, "loss": 0.5531, "step": 5495 }, { "epoch": 0.7, "grad_norm": 0.8611769642948816, "learning_rate": 7.545127164648347e-06, "loss": 0.6259, "step": 5496 }, { "epoch": 0.7, "grad_norm": 0.6240254873315566, "learning_rate": 7.544239171126959e-06, "loss": 0.5498, "step": 5497 }, { "epoch": 0.7, "grad_norm": 0.6247896324463584, "learning_rate": 7.5433510693054425e-06, "loss": 0.56, "step": 5498 }, { "epoch": 0.7, "grad_norm": 0.8005416297048783, "learning_rate": 7.542462859221597e-06, "loss": 0.6389, "step": 5499 }, { "epoch": 0.7, "grad_norm": 0.5355148967485481, "learning_rate": 7.541574540913232e-06, "loss": 0.52, "step": 5500 }, { "epoch": 0.7, "grad_norm": 0.7439451661989224, "learning_rate": 7.5406861144181606e-06, "loss": 0.5985, "step": 5501 }, { "epoch": 0.7, "grad_norm": 0.9114322341961698, "learning_rate": 7.539797579774201e-06, "loss": 0.6614, "step": 5502 }, { "epoch": 0.7, "grad_norm": 0.7011849852364572, "learning_rate": 7.538908937019174e-06, "loss": 0.6, "step": 5503 }, { "epoch": 0.7, "grad_norm": 0.5877618936349194, "learning_rate": 7.538020186190907e-06, "loss": 0.5305, "step": 5504 }, { "epoch": 0.7, "grad_norm": 0.7128670619590266, "learning_rate": 7.537131327327229e-06, "loss": 0.5942, "step": 5505 }, { "epoch": 0.7, "grad_norm": 0.6583241096129865, "learning_rate": 7.536242360465979e-06, "loss": 0.5493, "step": 5506 }, { "epoch": 0.7, "grad_norm": 0.7975469748019888, "learning_rate": 7.535353285644998e-06, "loss": 0.627, "step": 5507 }, { "epoch": 0.7, "grad_norm": 0.7146920127920903, "learning_rate": 7.534464102902129e-06, "loss": 0.6266, "step": 5508 }, { "epoch": 0.7, "grad_norm": 0.7334534430237059, "learning_rate": 7.53357481227522e-06, "loss": 0.5772, "step": 5509 }, { "epoch": 0.7, "grad_norm": 0.9073657062311751, "learning_rate": 7.53268541380213e-06, "loss": 0.6741, "step": 5510 }, { "epoch": 0.7, "grad_norm": 0.6777630299392265, "learning_rate": 7.531795907520714e-06, "loss": 0.5558, "step": 5511 }, { "epoch": 0.7, "grad_norm": 0.6457095570061134, "learning_rate": 7.530906293468837e-06, "loss": 0.5385, "step": 5512 }, { "epoch": 0.7, "grad_norm": 0.6152226963370656, "learning_rate": 7.530016571684366e-06, "loss": 0.5043, "step": 5513 }, { "epoch": 0.7, "grad_norm": 0.642558496011311, "learning_rate": 7.529126742205176e-06, "loss": 0.5459, "step": 5514 }, { "epoch": 0.7, "grad_norm": 0.5746935789627408, "learning_rate": 7.5282368050691405e-06, "loss": 0.555, "step": 5515 }, { "epoch": 0.7, "grad_norm": 0.6245333402941172, "learning_rate": 7.527346760314145e-06, "loss": 0.5692, "step": 5516 }, { "epoch": 0.7, "grad_norm": 0.6533041992053598, "learning_rate": 7.526456607978072e-06, "loss": 0.5658, "step": 5517 }, { "epoch": 0.7, "grad_norm": 0.7423793547101148, "learning_rate": 7.5255663480988165e-06, "loss": 0.5703, "step": 5518 }, { "epoch": 0.7, "grad_norm": 0.5864841659833234, "learning_rate": 7.524675980714272e-06, "loss": 0.5449, "step": 5519 }, { "epoch": 0.7, "grad_norm": 0.6568467936774076, "learning_rate": 7.523785505862339e-06, "loss": 0.4968, "step": 5520 }, { "epoch": 0.7, "grad_norm": 0.5663009235343691, "learning_rate": 7.5228949235809205e-06, "loss": 0.4981, "step": 5521 }, { "epoch": 0.7, "grad_norm": 0.5477631262774487, "learning_rate": 7.522004233907927e-06, "loss": 0.5194, "step": 5522 }, { "epoch": 0.7, "grad_norm": 0.767181909781016, "learning_rate": 7.521113436881273e-06, "loss": 0.5831, "step": 5523 }, { "epoch": 0.7, "grad_norm": 0.93607736525853, "learning_rate": 7.520222532538877e-06, "loss": 0.5758, "step": 5524 }, { "epoch": 0.7, "grad_norm": 0.7008607764510454, "learning_rate": 7.519331520918662e-06, "loss": 0.5337, "step": 5525 }, { "epoch": 0.7, "grad_norm": 0.7891733069469153, "learning_rate": 7.518440402058553e-06, "loss": 0.641, "step": 5526 }, { "epoch": 0.7, "grad_norm": 0.8685535532634578, "learning_rate": 7.517549175996485e-06, "loss": 0.6258, "step": 5527 }, { "epoch": 0.7, "grad_norm": 0.755134567623352, "learning_rate": 7.5166578427703914e-06, "loss": 0.637, "step": 5528 }, { "epoch": 0.7, "grad_norm": 0.7420557935642936, "learning_rate": 7.5157664024182174e-06, "loss": 0.5485, "step": 5529 }, { "epoch": 0.7, "grad_norm": 0.59662029397398, "learning_rate": 7.5148748549779075e-06, "loss": 0.5874, "step": 5530 }, { "epoch": 0.7, "grad_norm": 0.693248989669816, "learning_rate": 7.51398320048741e-06, "loss": 0.556, "step": 5531 }, { "epoch": 0.7, "grad_norm": 0.6317669816606988, "learning_rate": 7.513091438984682e-06, "loss": 0.5348, "step": 5532 }, { "epoch": 0.7, "grad_norm": 0.7504080633376384, "learning_rate": 7.5121995705076825e-06, "loss": 0.5211, "step": 5533 }, { "epoch": 0.71, "grad_norm": 0.5733965546187132, "learning_rate": 7.511307595094375e-06, "loss": 0.4929, "step": 5534 }, { "epoch": 0.71, "grad_norm": 0.7869138745795831, "learning_rate": 7.5104155127827275e-06, "loss": 0.5317, "step": 5535 }, { "epoch": 0.71, "grad_norm": 0.7438322500729675, "learning_rate": 7.509523323610713e-06, "loss": 0.6496, "step": 5536 }, { "epoch": 0.71, "grad_norm": 0.577448630036473, "learning_rate": 7.508631027616311e-06, "loss": 0.5282, "step": 5537 }, { "epoch": 0.71, "grad_norm": 0.6927512837277563, "learning_rate": 7.507738624837502e-06, "loss": 0.6542, "step": 5538 }, { "epoch": 0.71, "grad_norm": 0.7953838617113616, "learning_rate": 7.506846115312274e-06, "loss": 0.634, "step": 5539 }, { "epoch": 0.71, "grad_norm": 0.6284276738644262, "learning_rate": 7.505953499078618e-06, "loss": 0.5649, "step": 5540 }, { "epoch": 0.71, "grad_norm": 0.7195176283967624, "learning_rate": 7.505060776174529e-06, "loss": 0.6298, "step": 5541 }, { "epoch": 0.71, "grad_norm": 0.7861746368351229, "learning_rate": 7.504167946638007e-06, "loss": 0.5821, "step": 5542 }, { "epoch": 0.71, "grad_norm": 0.7098625392169347, "learning_rate": 7.503275010507058e-06, "loss": 0.5195, "step": 5543 }, { "epoch": 0.71, "grad_norm": 0.7370578608999878, "learning_rate": 7.502381967819692e-06, "loss": 0.6558, "step": 5544 }, { "epoch": 0.71, "grad_norm": 0.6854933639347381, "learning_rate": 7.501488818613921e-06, "loss": 0.5144, "step": 5545 }, { "epoch": 0.71, "grad_norm": 0.6374023047684221, "learning_rate": 7.500595562927764e-06, "loss": 0.5638, "step": 5546 }, { "epoch": 0.71, "grad_norm": 0.7846165899240831, "learning_rate": 7.499702200799246e-06, "loss": 0.602, "step": 5547 }, { "epoch": 0.71, "grad_norm": 0.7775624447052985, "learning_rate": 7.4988087322663936e-06, "loss": 0.5535, "step": 5548 }, { "epoch": 0.71, "grad_norm": 0.86649198116926, "learning_rate": 7.497915157367237e-06, "loss": 0.6449, "step": 5549 }, { "epoch": 0.71, "grad_norm": 0.6199681158813892, "learning_rate": 7.497021476139816e-06, "loss": 0.5017, "step": 5550 }, { "epoch": 0.71, "grad_norm": 0.5509487381920122, "learning_rate": 7.496127688622169e-06, "loss": 0.5233, "step": 5551 }, { "epoch": 0.71, "grad_norm": 0.5544781377612417, "learning_rate": 7.495233794852342e-06, "loss": 0.5086, "step": 5552 }, { "epoch": 0.71, "grad_norm": 0.5923297570546956, "learning_rate": 7.494339794868388e-06, "loss": 0.5406, "step": 5553 }, { "epoch": 0.71, "grad_norm": 0.7484659934821983, "learning_rate": 7.493445688708358e-06, "loss": 0.5456, "step": 5554 }, { "epoch": 0.71, "grad_norm": 0.6575814254828445, "learning_rate": 7.492551476410314e-06, "loss": 0.5076, "step": 5555 }, { "epoch": 0.71, "grad_norm": 0.6520039793260324, "learning_rate": 7.491657158012319e-06, "loss": 0.4847, "step": 5556 }, { "epoch": 0.71, "grad_norm": 0.8301138512520254, "learning_rate": 7.4907627335524405e-06, "loss": 0.6799, "step": 5557 }, { "epoch": 0.71, "grad_norm": 0.7741470180064739, "learning_rate": 7.489868203068752e-06, "loss": 0.5918, "step": 5558 }, { "epoch": 0.71, "grad_norm": 0.7109115392808868, "learning_rate": 7.488973566599329e-06, "loss": 0.6069, "step": 5559 }, { "epoch": 0.71, "grad_norm": 0.767178787813936, "learning_rate": 7.488078824182257e-06, "loss": 0.576, "step": 5560 }, { "epoch": 0.71, "grad_norm": 0.581246501876493, "learning_rate": 7.4871839758556185e-06, "loss": 0.5452, "step": 5561 }, { "epoch": 0.71, "grad_norm": 0.8460346470153032, "learning_rate": 7.4862890216575065e-06, "loss": 0.6142, "step": 5562 }, { "epoch": 0.71, "grad_norm": 0.8801084456986683, "learning_rate": 7.4853939616260174e-06, "loss": 0.5992, "step": 5563 }, { "epoch": 0.71, "grad_norm": 0.7579932606957567, "learning_rate": 7.4844987957992485e-06, "loss": 0.6464, "step": 5564 }, { "epoch": 0.71, "grad_norm": 0.6864144690084117, "learning_rate": 7.483603524215303e-06, "loss": 0.5825, "step": 5565 }, { "epoch": 0.71, "grad_norm": 0.8509339686622452, "learning_rate": 7.482708146912295e-06, "loss": 0.6108, "step": 5566 }, { "epoch": 0.71, "grad_norm": 0.555737422673903, "learning_rate": 7.481812663928334e-06, "loss": 0.5675, "step": 5567 }, { "epoch": 0.71, "grad_norm": 0.7807117691246142, "learning_rate": 7.4809170753015395e-06, "loss": 0.6354, "step": 5568 }, { "epoch": 0.71, "grad_norm": 0.6876108956454414, "learning_rate": 7.480021381070032e-06, "loss": 0.5911, "step": 5569 }, { "epoch": 0.71, "grad_norm": 0.5522113252313763, "learning_rate": 7.479125581271939e-06, "loss": 0.4529, "step": 5570 }, { "epoch": 0.71, "grad_norm": 0.5929854641676827, "learning_rate": 7.478229675945392e-06, "loss": 0.4925, "step": 5571 }, { "epoch": 0.71, "grad_norm": 0.7239031681381652, "learning_rate": 7.477333665128526e-06, "loss": 0.6061, "step": 5572 }, { "epoch": 0.71, "grad_norm": 0.6463245893762213, "learning_rate": 7.4764375488594855e-06, "loss": 0.5464, "step": 5573 }, { "epoch": 0.71, "grad_norm": 0.7083116479145974, "learning_rate": 7.47554132717641e-06, "loss": 0.5667, "step": 5574 }, { "epoch": 0.71, "grad_norm": 0.8719949481149875, "learning_rate": 7.474645000117451e-06, "loss": 0.6517, "step": 5575 }, { "epoch": 0.71, "grad_norm": 0.5770683532298139, "learning_rate": 7.473748567720762e-06, "loss": 0.5132, "step": 5576 }, { "epoch": 0.71, "grad_norm": 0.7199319889513051, "learning_rate": 7.472852030024501e-06, "loss": 0.5643, "step": 5577 }, { "epoch": 0.71, "grad_norm": 0.6908856397956292, "learning_rate": 7.4719553870668324e-06, "loss": 0.5836, "step": 5578 }, { "epoch": 0.71, "grad_norm": 0.6841647098460729, "learning_rate": 7.4710586388859215e-06, "loss": 0.6108, "step": 5579 }, { "epoch": 0.71, "grad_norm": 0.5849911087359764, "learning_rate": 7.470161785519942e-06, "loss": 0.5901, "step": 5580 }, { "epoch": 0.71, "grad_norm": 0.6602073844857029, "learning_rate": 7.469264827007068e-06, "loss": 0.5685, "step": 5581 }, { "epoch": 0.71, "grad_norm": 0.6523554592045353, "learning_rate": 7.46836776338548e-06, "loss": 0.5861, "step": 5582 }, { "epoch": 0.71, "grad_norm": 0.6890021902397042, "learning_rate": 7.467470594693364e-06, "loss": 0.5016, "step": 5583 }, { "epoch": 0.71, "grad_norm": 0.6401967613016705, "learning_rate": 7.466573320968912e-06, "loss": 0.508, "step": 5584 }, { "epoch": 0.71, "grad_norm": 0.6570991772661504, "learning_rate": 7.465675942250314e-06, "loss": 0.5759, "step": 5585 }, { "epoch": 0.71, "grad_norm": 1.0630567084386817, "learning_rate": 7.464778458575771e-06, "loss": 0.6719, "step": 5586 }, { "epoch": 0.71, "grad_norm": 0.6374844016034613, "learning_rate": 7.4638808699834855e-06, "loss": 0.5183, "step": 5587 }, { "epoch": 0.71, "grad_norm": 0.5731688138145928, "learning_rate": 7.462983176511663e-06, "loss": 0.4934, "step": 5588 }, { "epoch": 0.71, "grad_norm": 0.9017871195801972, "learning_rate": 7.462085378198519e-06, "loss": 0.5813, "step": 5589 }, { "epoch": 0.71, "grad_norm": 0.6838869058084027, "learning_rate": 7.4611874750822675e-06, "loss": 0.5739, "step": 5590 }, { "epoch": 0.71, "grad_norm": 0.7481473094848149, "learning_rate": 7.460289467201129e-06, "loss": 0.6227, "step": 5591 }, { "epoch": 0.71, "grad_norm": 0.7552485024500268, "learning_rate": 7.459391354593333e-06, "loss": 0.5599, "step": 5592 }, { "epoch": 0.71, "grad_norm": 0.6282533925896012, "learning_rate": 7.458493137297103e-06, "loss": 0.5235, "step": 5593 }, { "epoch": 0.71, "grad_norm": 0.6166800626199566, "learning_rate": 7.457594815350678e-06, "loss": 0.5384, "step": 5594 }, { "epoch": 0.71, "grad_norm": 0.7920820713629735, "learning_rate": 7.456696388792295e-06, "loss": 0.5468, "step": 5595 }, { "epoch": 0.71, "grad_norm": 0.7346765132397483, "learning_rate": 7.455797857660196e-06, "loss": 0.536, "step": 5596 }, { "epoch": 0.71, "grad_norm": 0.6430437289013298, "learning_rate": 7.454899221992632e-06, "loss": 0.4796, "step": 5597 }, { "epoch": 0.71, "grad_norm": 0.6721675578508051, "learning_rate": 7.454000481827851e-06, "loss": 0.5873, "step": 5598 }, { "epoch": 0.71, "grad_norm": 0.6096008981029728, "learning_rate": 7.453101637204111e-06, "loss": 0.5323, "step": 5599 }, { "epoch": 0.71, "grad_norm": 0.5925943527999598, "learning_rate": 7.452202688159674e-06, "loss": 0.5041, "step": 5600 }, { "epoch": 0.71, "grad_norm": 0.5728460009147941, "learning_rate": 7.451303634732805e-06, "loss": 0.5218, "step": 5601 }, { "epoch": 0.71, "grad_norm": 0.6973336160605523, "learning_rate": 7.450404476961773e-06, "loss": 0.6362, "step": 5602 }, { "epoch": 0.71, "grad_norm": 0.7958766218236931, "learning_rate": 7.449505214884853e-06, "loss": 0.6399, "step": 5603 }, { "epoch": 0.71, "grad_norm": 0.6441371673883424, "learning_rate": 7.448605848540324e-06, "loss": 0.5445, "step": 5604 }, { "epoch": 0.71, "grad_norm": 0.5541922008080312, "learning_rate": 7.447706377966469e-06, "loss": 0.4974, "step": 5605 }, { "epoch": 0.71, "grad_norm": 0.7344791006261536, "learning_rate": 7.446806803201574e-06, "loss": 0.6002, "step": 5606 }, { "epoch": 0.71, "grad_norm": 0.7050838857584212, "learning_rate": 7.445907124283933e-06, "loss": 0.5986, "step": 5607 }, { "epoch": 0.71, "grad_norm": 0.7662848114017572, "learning_rate": 7.445007341251841e-06, "loss": 0.6198, "step": 5608 }, { "epoch": 0.71, "grad_norm": 0.9430902788951556, "learning_rate": 7.444107454143601e-06, "loss": 0.6372, "step": 5609 }, { "epoch": 0.71, "grad_norm": 0.7552672058105092, "learning_rate": 7.443207462997515e-06, "loss": 0.6099, "step": 5610 }, { "epoch": 0.71, "grad_norm": 0.6819261497740676, "learning_rate": 7.442307367851897e-06, "loss": 0.55, "step": 5611 }, { "epoch": 0.71, "grad_norm": 0.7249374279700982, "learning_rate": 7.441407168745056e-06, "loss": 0.5955, "step": 5612 }, { "epoch": 0.72, "grad_norm": 0.6992416001775748, "learning_rate": 7.440506865715316e-06, "loss": 0.5949, "step": 5613 }, { "epoch": 0.72, "grad_norm": 0.6546698374596573, "learning_rate": 7.439606458800995e-06, "loss": 0.4977, "step": 5614 }, { "epoch": 0.72, "grad_norm": 0.7400243103854021, "learning_rate": 7.438705948040426e-06, "loss": 0.641, "step": 5615 }, { "epoch": 0.72, "grad_norm": 0.5775259899719023, "learning_rate": 7.4378053334719345e-06, "loss": 0.5262, "step": 5616 }, { "epoch": 0.72, "grad_norm": 0.5750151254313061, "learning_rate": 7.436904615133862e-06, "loss": 0.4882, "step": 5617 }, { "epoch": 0.72, "grad_norm": 0.7503319692774794, "learning_rate": 7.436003793064548e-06, "loss": 0.5577, "step": 5618 }, { "epoch": 0.72, "grad_norm": 0.577851017085559, "learning_rate": 7.435102867302335e-06, "loss": 0.5084, "step": 5619 }, { "epoch": 0.72, "grad_norm": 0.7988919347178581, "learning_rate": 7.434201837885576e-06, "loss": 0.6207, "step": 5620 }, { "epoch": 0.72, "grad_norm": 0.6414598371822323, "learning_rate": 7.433300704852622e-06, "loss": 0.5151, "step": 5621 }, { "epoch": 0.72, "grad_norm": 0.6155064305103313, "learning_rate": 7.432399468241833e-06, "loss": 0.5176, "step": 5622 }, { "epoch": 0.72, "grad_norm": 0.6295500286375426, "learning_rate": 7.431498128091572e-06, "loss": 0.5537, "step": 5623 }, { "epoch": 0.72, "grad_norm": 0.8794832337445059, "learning_rate": 7.4305966844402055e-06, "loss": 0.6861, "step": 5624 }, { "epoch": 0.72, "grad_norm": 0.7617276757271866, "learning_rate": 7.429695137326105e-06, "loss": 0.642, "step": 5625 }, { "epoch": 0.72, "grad_norm": 0.597706565973292, "learning_rate": 7.428793486787648e-06, "loss": 0.5264, "step": 5626 }, { "epoch": 0.72, "grad_norm": 0.6784040452151213, "learning_rate": 7.427891732863212e-06, "loss": 0.5605, "step": 5627 }, { "epoch": 0.72, "grad_norm": 0.6174673738110383, "learning_rate": 7.426989875591183e-06, "loss": 0.5353, "step": 5628 }, { "epoch": 0.72, "grad_norm": 0.6309831888077089, "learning_rate": 7.426087915009952e-06, "loss": 0.5295, "step": 5629 }, { "epoch": 0.72, "grad_norm": 0.5773689344234675, "learning_rate": 7.42518585115791e-06, "loss": 0.5117, "step": 5630 }, { "epoch": 0.72, "grad_norm": 0.6580223975721441, "learning_rate": 7.424283684073456e-06, "loss": 0.5813, "step": 5631 }, { "epoch": 0.72, "grad_norm": 0.7694103118508562, "learning_rate": 7.423381413794995e-06, "loss": 0.6048, "step": 5632 }, { "epoch": 0.72, "grad_norm": 0.7798427890138921, "learning_rate": 7.422479040360928e-06, "loss": 0.593, "step": 5633 }, { "epoch": 0.72, "grad_norm": 0.5912828207632826, "learning_rate": 7.42157656380967e-06, "loss": 0.476, "step": 5634 }, { "epoch": 0.72, "grad_norm": 0.6336847705950144, "learning_rate": 7.4206739841796375e-06, "loss": 0.4985, "step": 5635 }, { "epoch": 0.72, "grad_norm": 0.8028719077312637, "learning_rate": 7.419771301509248e-06, "loss": 0.632, "step": 5636 }, { "epoch": 0.72, "grad_norm": 0.667322966161425, "learning_rate": 7.418868515836927e-06, "loss": 0.5008, "step": 5637 }, { "epoch": 0.72, "grad_norm": 0.5994255309463825, "learning_rate": 7.4179656272011025e-06, "loss": 0.5302, "step": 5638 }, { "epoch": 0.72, "grad_norm": 0.7060310675464113, "learning_rate": 7.417062635640207e-06, "loss": 0.5395, "step": 5639 }, { "epoch": 0.72, "grad_norm": 0.6671501879542686, "learning_rate": 7.4161595411926804e-06, "loss": 0.5724, "step": 5640 }, { "epoch": 0.72, "grad_norm": 0.6086150095679586, "learning_rate": 7.415256343896963e-06, "loss": 0.5083, "step": 5641 }, { "epoch": 0.72, "grad_norm": 0.67659508707042, "learning_rate": 7.4143530437915e-06, "loss": 0.53, "step": 5642 }, { "epoch": 0.72, "grad_norm": 0.5930453102751836, "learning_rate": 7.413449640914744e-06, "loss": 0.5051, "step": 5643 }, { "epoch": 0.72, "grad_norm": 0.6773802712835152, "learning_rate": 7.4125461353051495e-06, "loss": 0.6126, "step": 5644 }, { "epoch": 0.72, "grad_norm": 0.638229018001181, "learning_rate": 7.411642527001174e-06, "loss": 0.53, "step": 5645 }, { "epoch": 0.72, "grad_norm": 0.5765617116730168, "learning_rate": 7.410738816041283e-06, "loss": 0.5203, "step": 5646 }, { "epoch": 0.72, "grad_norm": 0.7021036191079978, "learning_rate": 7.409835002463946e-06, "loss": 0.56, "step": 5647 }, { "epoch": 0.72, "grad_norm": 0.6326871301716849, "learning_rate": 7.408931086307631e-06, "loss": 0.5378, "step": 5648 }, { "epoch": 0.72, "grad_norm": 0.6308750172678016, "learning_rate": 7.408027067610819e-06, "loss": 0.5477, "step": 5649 }, { "epoch": 0.72, "grad_norm": 0.6086451695692009, "learning_rate": 7.40712294641199e-06, "loss": 0.5381, "step": 5650 }, { "epoch": 0.72, "grad_norm": 0.6221308692310638, "learning_rate": 7.406218722749629e-06, "loss": 0.548, "step": 5651 }, { "epoch": 0.72, "grad_norm": 0.7823893248720007, "learning_rate": 7.405314396662224e-06, "loss": 0.5919, "step": 5652 }, { "epoch": 0.72, "grad_norm": 0.5910429812763102, "learning_rate": 7.404409968188274e-06, "loss": 0.4926, "step": 5653 }, { "epoch": 0.72, "grad_norm": 0.845404297963058, "learning_rate": 7.403505437366274e-06, "loss": 0.6041, "step": 5654 }, { "epoch": 0.72, "grad_norm": 0.6082515530765161, "learning_rate": 7.40260080423473e-06, "loss": 0.568, "step": 5655 }, { "epoch": 0.72, "grad_norm": 0.5486652756234054, "learning_rate": 7.401696068832145e-06, "loss": 0.4863, "step": 5656 }, { "epoch": 0.72, "grad_norm": 0.6023368850152669, "learning_rate": 7.400791231197034e-06, "loss": 0.5219, "step": 5657 }, { "epoch": 0.72, "grad_norm": 0.6377601372870552, "learning_rate": 7.399886291367913e-06, "loss": 0.5574, "step": 5658 }, { "epoch": 0.72, "grad_norm": 0.7918680740193286, "learning_rate": 7.398981249383299e-06, "loss": 0.5839, "step": 5659 }, { "epoch": 0.72, "grad_norm": 0.5790747823396108, "learning_rate": 7.398076105281722e-06, "loss": 0.5479, "step": 5660 }, { "epoch": 0.72, "grad_norm": 0.7643797573132838, "learning_rate": 7.397170859101705e-06, "loss": 0.6538, "step": 5661 }, { "epoch": 0.72, "grad_norm": 0.6298632808110909, "learning_rate": 7.396265510881788e-06, "loss": 0.5053, "step": 5662 }, { "epoch": 0.72, "grad_norm": 0.6966163507054176, "learning_rate": 7.3953600606605035e-06, "loss": 0.5453, "step": 5663 }, { "epoch": 0.72, "grad_norm": 0.6567215107829254, "learning_rate": 7.394454508476397e-06, "loss": 0.5479, "step": 5664 }, { "epoch": 0.72, "grad_norm": 0.5775030708547746, "learning_rate": 7.393548854368014e-06, "loss": 0.4355, "step": 5665 }, { "epoch": 0.72, "grad_norm": 0.7233305176706283, "learning_rate": 7.392643098373904e-06, "loss": 0.5721, "step": 5666 }, { "epoch": 0.72, "grad_norm": 1.1049064980849475, "learning_rate": 7.391737240532624e-06, "loss": 0.6325, "step": 5667 }, { "epoch": 0.72, "grad_norm": 1.2527277130532108, "learning_rate": 7.390831280882732e-06, "loss": 0.6771, "step": 5668 }, { "epoch": 0.72, "grad_norm": 0.8047751973395623, "learning_rate": 7.389925219462792e-06, "loss": 0.5585, "step": 5669 }, { "epoch": 0.72, "grad_norm": 0.8064024223119045, "learning_rate": 7.389019056311371e-06, "loss": 0.6413, "step": 5670 }, { "epoch": 0.72, "grad_norm": 0.589140462294574, "learning_rate": 7.3881127914670455e-06, "loss": 0.5318, "step": 5671 }, { "epoch": 0.72, "grad_norm": 0.6241907795546442, "learning_rate": 7.38720642496839e-06, "loss": 0.5404, "step": 5672 }, { "epoch": 0.72, "grad_norm": 0.6073188246300422, "learning_rate": 7.386299956853984e-06, "loss": 0.5196, "step": 5673 }, { "epoch": 0.72, "grad_norm": 0.5545529651076304, "learning_rate": 7.385393387162415e-06, "loss": 0.5349, "step": 5674 }, { "epoch": 0.72, "grad_norm": 0.7124743450162648, "learning_rate": 7.384486715932272e-06, "loss": 0.5394, "step": 5675 }, { "epoch": 0.72, "grad_norm": 0.5928389882833025, "learning_rate": 7.383579943202148e-06, "loss": 0.5678, "step": 5676 }, { "epoch": 0.72, "grad_norm": 0.7692055633192265, "learning_rate": 7.382673069010644e-06, "loss": 0.5883, "step": 5677 }, { "epoch": 0.72, "grad_norm": 0.7938091410107768, "learning_rate": 7.381766093396362e-06, "loss": 0.6544, "step": 5678 }, { "epoch": 0.72, "grad_norm": 0.6892771854821916, "learning_rate": 7.380859016397908e-06, "loss": 0.521, "step": 5679 }, { "epoch": 0.72, "grad_norm": 0.5692625926593775, "learning_rate": 7.379951838053894e-06, "loss": 0.5834, "step": 5680 }, { "epoch": 0.72, "grad_norm": 0.6058655119416744, "learning_rate": 7.3790445584029345e-06, "loss": 0.5688, "step": 5681 }, { "epoch": 0.72, "grad_norm": 0.5780249326111432, "learning_rate": 7.378137177483651e-06, "loss": 0.5268, "step": 5682 }, { "epoch": 0.72, "grad_norm": 0.6595996301171776, "learning_rate": 7.377229695334667e-06, "loss": 0.5472, "step": 5683 }, { "epoch": 0.72, "grad_norm": 0.8328354287000123, "learning_rate": 7.376322111994612e-06, "loss": 0.6481, "step": 5684 }, { "epoch": 0.72, "grad_norm": 0.606581976116039, "learning_rate": 7.375414427502117e-06, "loss": 0.5033, "step": 5685 }, { "epoch": 0.72, "grad_norm": 1.12974718942241, "learning_rate": 7.374506641895822e-06, "loss": 0.5692, "step": 5686 }, { "epoch": 0.72, "grad_norm": 0.8017090667085868, "learning_rate": 7.373598755214367e-06, "loss": 0.5877, "step": 5687 }, { "epoch": 0.72, "grad_norm": 0.8565991649038176, "learning_rate": 7.3726907674963975e-06, "loss": 0.6385, "step": 5688 }, { "epoch": 0.72, "grad_norm": 0.9003083774224998, "learning_rate": 7.371782678780563e-06, "loss": 0.6021, "step": 5689 }, { "epoch": 0.72, "grad_norm": 0.8611439299483281, "learning_rate": 7.370874489105521e-06, "loss": 0.6344, "step": 5690 }, { "epoch": 0.73, "grad_norm": 0.7747739841680015, "learning_rate": 7.369966198509927e-06, "loss": 0.6773, "step": 5691 }, { "epoch": 0.73, "grad_norm": 0.5755643231676336, "learning_rate": 7.369057807032446e-06, "loss": 0.5079, "step": 5692 }, { "epoch": 0.73, "grad_norm": 0.5969373227799505, "learning_rate": 7.368149314711745e-06, "loss": 0.5407, "step": 5693 }, { "epoch": 0.73, "grad_norm": 0.6443221645439203, "learning_rate": 7.367240721586493e-06, "loss": 0.609, "step": 5694 }, { "epoch": 0.73, "grad_norm": 0.8416427703897001, "learning_rate": 7.3663320276953695e-06, "loss": 0.6508, "step": 5695 }, { "epoch": 0.73, "grad_norm": 1.641847549599186, "learning_rate": 7.3654232330770535e-06, "loss": 0.6177, "step": 5696 }, { "epoch": 0.73, "grad_norm": 0.582604822902364, "learning_rate": 7.3645143377702284e-06, "loss": 0.5219, "step": 5697 }, { "epoch": 0.73, "grad_norm": 0.8213505261178368, "learning_rate": 7.363605341813585e-06, "loss": 0.5864, "step": 5698 }, { "epoch": 0.73, "grad_norm": 0.6305030534520216, "learning_rate": 7.362696245245815e-06, "loss": 0.5217, "step": 5699 }, { "epoch": 0.73, "grad_norm": 0.6773320674301949, "learning_rate": 7.3617870481056165e-06, "loss": 0.5949, "step": 5700 }, { "epoch": 0.73, "grad_norm": 0.6919005551557165, "learning_rate": 7.360877750431689e-06, "loss": 0.4858, "step": 5701 }, { "epoch": 0.73, "grad_norm": 0.5804239232357055, "learning_rate": 7.3599683522627405e-06, "loss": 0.5496, "step": 5702 }, { "epoch": 0.73, "grad_norm": 0.6610032405258903, "learning_rate": 7.3590588536374805e-06, "loss": 0.4988, "step": 5703 }, { "epoch": 0.73, "grad_norm": 0.8855582237760503, "learning_rate": 7.358149254594624e-06, "loss": 0.596, "step": 5704 }, { "epoch": 0.73, "grad_norm": 0.5636212708018711, "learning_rate": 7.35723955517289e-06, "loss": 0.4766, "step": 5705 }, { "epoch": 0.73, "grad_norm": 0.6337044899930441, "learning_rate": 7.3563297554110005e-06, "loss": 0.5057, "step": 5706 }, { "epoch": 0.73, "grad_norm": 0.6345555559804692, "learning_rate": 7.35541985534768e-06, "loss": 0.5733, "step": 5707 }, { "epoch": 0.73, "grad_norm": 0.7950576407901063, "learning_rate": 7.354509855021667e-06, "loss": 0.555, "step": 5708 }, { "epoch": 0.73, "grad_norm": 0.6605766481544945, "learning_rate": 7.353599754471692e-06, "loss": 0.5253, "step": 5709 }, { "epoch": 0.73, "grad_norm": 0.7430219107069737, "learning_rate": 7.3526895537364965e-06, "loss": 0.6322, "step": 5710 }, { "epoch": 0.73, "grad_norm": 0.8479109464219405, "learning_rate": 7.351779252854825e-06, "loss": 0.6321, "step": 5711 }, { "epoch": 0.73, "grad_norm": 0.677630186932484, "learning_rate": 7.350868851865426e-06, "loss": 0.5457, "step": 5712 }, { "epoch": 0.73, "grad_norm": 0.9251062126657649, "learning_rate": 7.349958350807052e-06, "loss": 0.5952, "step": 5713 }, { "epoch": 0.73, "grad_norm": 0.5326632761752162, "learning_rate": 7.34904774971846e-06, "loss": 0.5065, "step": 5714 }, { "epoch": 0.73, "grad_norm": 0.6292827983477869, "learning_rate": 7.348137048638412e-06, "loss": 0.492, "step": 5715 }, { "epoch": 0.73, "grad_norm": 0.5743666733056835, "learning_rate": 7.347226247605673e-06, "loss": 0.5412, "step": 5716 }, { "epoch": 0.73, "grad_norm": 0.6139657094352567, "learning_rate": 7.346315346659014e-06, "loss": 0.514, "step": 5717 }, { "epoch": 0.73, "grad_norm": 0.9149782753433823, "learning_rate": 7.345404345837209e-06, "loss": 0.5899, "step": 5718 }, { "epoch": 0.73, "grad_norm": 0.7233615211723191, "learning_rate": 7.344493245179035e-06, "loss": 0.6705, "step": 5719 }, { "epoch": 0.73, "grad_norm": 0.9639616574622281, "learning_rate": 7.343582044723276e-06, "loss": 0.5781, "step": 5720 }, { "epoch": 0.73, "grad_norm": 0.5957331894293145, "learning_rate": 7.3426707445087184e-06, "loss": 0.4817, "step": 5721 }, { "epoch": 0.73, "grad_norm": 0.7178498297909918, "learning_rate": 7.341759344574152e-06, "loss": 0.5127, "step": 5722 }, { "epoch": 0.73, "grad_norm": 0.8664327293580654, "learning_rate": 7.340847844958374e-06, "loss": 0.6246, "step": 5723 }, { "epoch": 0.73, "grad_norm": 0.8089286751783417, "learning_rate": 7.339936245700185e-06, "loss": 0.6295, "step": 5724 }, { "epoch": 0.73, "grad_norm": 0.7828796500291207, "learning_rate": 7.339024546838387e-06, "loss": 0.6233, "step": 5725 }, { "epoch": 0.73, "grad_norm": 0.7008235148702397, "learning_rate": 7.338112748411788e-06, "loss": 0.5881, "step": 5726 }, { "epoch": 0.73, "grad_norm": 0.8967780490066428, "learning_rate": 7.3372008504592004e-06, "loss": 0.6183, "step": 5727 }, { "epoch": 0.73, "grad_norm": 0.8810109259499682, "learning_rate": 7.3362888530194424e-06, "loss": 0.6314, "step": 5728 }, { "epoch": 0.73, "grad_norm": 0.6175716689203922, "learning_rate": 7.335376756131332e-06, "loss": 0.4422, "step": 5729 }, { "epoch": 0.73, "grad_norm": 0.7423435893389143, "learning_rate": 7.334464559833696e-06, "loss": 0.6334, "step": 5730 }, { "epoch": 0.73, "grad_norm": 0.6636192297928335, "learning_rate": 7.3335522641653646e-06, "loss": 0.5319, "step": 5731 }, { "epoch": 0.73, "grad_norm": 0.66616175987784, "learning_rate": 7.33263986916517e-06, "loss": 0.5524, "step": 5732 }, { "epoch": 0.73, "grad_norm": 0.6475477908293895, "learning_rate": 7.33172737487195e-06, "loss": 0.5317, "step": 5733 }, { "epoch": 0.73, "grad_norm": 0.7146391529957522, "learning_rate": 7.330814781324547e-06, "loss": 0.5766, "step": 5734 }, { "epoch": 0.73, "grad_norm": 0.7788520304420267, "learning_rate": 7.329902088561806e-06, "loss": 0.6296, "step": 5735 }, { "epoch": 0.73, "grad_norm": 0.6937233753639961, "learning_rate": 7.328989296622581e-06, "loss": 0.6616, "step": 5736 }, { "epoch": 0.73, "grad_norm": 0.8307538059154279, "learning_rate": 7.328076405545722e-06, "loss": 0.5748, "step": 5737 }, { "epoch": 0.73, "grad_norm": 0.9242323931259457, "learning_rate": 7.327163415370089e-06, "loss": 0.6153, "step": 5738 }, { "epoch": 0.73, "grad_norm": 0.9070685116030577, "learning_rate": 7.326250326134548e-06, "loss": 0.6259, "step": 5739 }, { "epoch": 0.73, "grad_norm": 0.7604768170118185, "learning_rate": 7.325337137877963e-06, "loss": 0.6053, "step": 5740 }, { "epoch": 0.73, "grad_norm": 0.7182055685307104, "learning_rate": 7.324423850639207e-06, "loss": 0.536, "step": 5741 }, { "epoch": 0.73, "grad_norm": 0.6482156691808076, "learning_rate": 7.323510464457156e-06, "loss": 0.4842, "step": 5742 }, { "epoch": 0.73, "grad_norm": 0.7466382047438345, "learning_rate": 7.322596979370689e-06, "loss": 0.628, "step": 5743 }, { "epoch": 0.73, "grad_norm": 0.6334318570349889, "learning_rate": 7.321683395418691e-06, "loss": 0.541, "step": 5744 }, { "epoch": 0.73, "grad_norm": 0.5639089463078808, "learning_rate": 7.320769712640048e-06, "loss": 0.5475, "step": 5745 }, { "epoch": 0.73, "grad_norm": 1.0410641630002657, "learning_rate": 7.319855931073656e-06, "loss": 0.6268, "step": 5746 }, { "epoch": 0.73, "grad_norm": 0.9442298973443018, "learning_rate": 7.318942050758411e-06, "loss": 0.6891, "step": 5747 }, { "epoch": 0.73, "grad_norm": 0.6236351149290799, "learning_rate": 7.318028071733212e-06, "loss": 0.5682, "step": 5748 }, { "epoch": 0.73, "grad_norm": 0.6115715915318616, "learning_rate": 7.317113994036967e-06, "loss": 0.5258, "step": 5749 }, { "epoch": 0.73, "grad_norm": 0.5883876225640409, "learning_rate": 7.316199817708581e-06, "loss": 0.5212, "step": 5750 }, { "epoch": 0.73, "grad_norm": 0.5565977068422248, "learning_rate": 7.315285542786972e-06, "loss": 0.4633, "step": 5751 }, { "epoch": 0.73, "grad_norm": 0.5742294527078042, "learning_rate": 7.314371169311056e-06, "loss": 0.4993, "step": 5752 }, { "epoch": 0.73, "grad_norm": 0.7491745457961114, "learning_rate": 7.313456697319754e-06, "loss": 0.6878, "step": 5753 }, { "epoch": 0.73, "grad_norm": 0.6489124135714357, "learning_rate": 7.312542126851994e-06, "loss": 0.5235, "step": 5754 }, { "epoch": 0.73, "grad_norm": 0.662751538576484, "learning_rate": 7.311627457946705e-06, "loss": 0.5554, "step": 5755 }, { "epoch": 0.73, "grad_norm": 0.7297963370112601, "learning_rate": 7.310712690642823e-06, "loss": 0.5713, "step": 5756 }, { "epoch": 0.73, "grad_norm": 0.6276117480430634, "learning_rate": 7.309797824979283e-06, "loss": 0.6388, "step": 5757 }, { "epoch": 0.73, "grad_norm": 0.8929000216587889, "learning_rate": 7.308882860995034e-06, "loss": 0.6366, "step": 5758 }, { "epoch": 0.73, "grad_norm": 0.5414804000899441, "learning_rate": 7.307967798729019e-06, "loss": 0.4763, "step": 5759 }, { "epoch": 0.73, "grad_norm": 0.6689040892470147, "learning_rate": 7.307052638220189e-06, "loss": 0.5, "step": 5760 }, { "epoch": 0.73, "grad_norm": 0.6150035201513734, "learning_rate": 7.306137379507501e-06, "loss": 0.5282, "step": 5761 }, { "epoch": 0.73, "grad_norm": 0.6229377456891717, "learning_rate": 7.305222022629914e-06, "loss": 0.6035, "step": 5762 }, { "epoch": 0.73, "grad_norm": 0.5776227052583409, "learning_rate": 7.304306567626391e-06, "loss": 0.5294, "step": 5763 }, { "epoch": 0.73, "grad_norm": 0.58151431386689, "learning_rate": 7.3033910145359025e-06, "loss": 0.5283, "step": 5764 }, { "epoch": 0.73, "grad_norm": 0.6479035031175571, "learning_rate": 7.302475363397419e-06, "loss": 0.518, "step": 5765 }, { "epoch": 0.73, "grad_norm": 0.7282825080334686, "learning_rate": 7.301559614249915e-06, "loss": 0.5657, "step": 5766 }, { "epoch": 0.73, "grad_norm": 0.574330821080527, "learning_rate": 7.300643767132376e-06, "loss": 0.5208, "step": 5767 }, { "epoch": 0.73, "grad_norm": 0.736684147857944, "learning_rate": 7.299727822083782e-06, "loss": 0.6046, "step": 5768 }, { "epoch": 0.73, "grad_norm": 0.891767415071248, "learning_rate": 7.298811779143122e-06, "loss": 0.618, "step": 5769 }, { "epoch": 0.74, "grad_norm": 0.6490241774211478, "learning_rate": 7.297895638349392e-06, "loss": 0.5211, "step": 5770 }, { "epoch": 0.74, "grad_norm": 0.6065242278724827, "learning_rate": 7.2969793997415885e-06, "loss": 0.5445, "step": 5771 }, { "epoch": 0.74, "grad_norm": 0.5399168870417422, "learning_rate": 7.2960630633587115e-06, "loss": 0.503, "step": 5772 }, { "epoch": 0.74, "grad_norm": 0.7694682650140962, "learning_rate": 7.295146629239767e-06, "loss": 0.5799, "step": 5773 }, { "epoch": 0.74, "grad_norm": 0.748506408540628, "learning_rate": 7.2942300974237644e-06, "loss": 0.609, "step": 5774 }, { "epoch": 0.74, "grad_norm": 0.7446645910047759, "learning_rate": 7.293313467949719e-06, "loss": 0.5374, "step": 5775 }, { "epoch": 0.74, "grad_norm": 0.707044546472477, "learning_rate": 7.292396740856645e-06, "loss": 0.5104, "step": 5776 }, { "epoch": 0.74, "grad_norm": 0.5935709965582329, "learning_rate": 7.291479916183571e-06, "loss": 0.5092, "step": 5777 }, { "epoch": 0.74, "grad_norm": 0.6715587002930182, "learning_rate": 7.2905629939695165e-06, "loss": 0.5302, "step": 5778 }, { "epoch": 0.74, "grad_norm": 0.7167607621852933, "learning_rate": 7.289645974253517e-06, "loss": 0.5911, "step": 5779 }, { "epoch": 0.74, "grad_norm": 0.5748019431123031, "learning_rate": 7.288728857074603e-06, "loss": 0.552, "step": 5780 }, { "epoch": 0.74, "grad_norm": 0.5809459574804884, "learning_rate": 7.2878116424718156e-06, "loss": 0.5267, "step": 5781 }, { "epoch": 0.74, "grad_norm": 0.7847818709982552, "learning_rate": 7.286894330484199e-06, "loss": 0.6018, "step": 5782 }, { "epoch": 0.74, "grad_norm": 0.7157408343451143, "learning_rate": 7.285976921150797e-06, "loss": 0.5727, "step": 5783 }, { "epoch": 0.74, "grad_norm": 0.597567504021634, "learning_rate": 7.285059414510662e-06, "loss": 0.5119, "step": 5784 }, { "epoch": 0.74, "grad_norm": 0.5660599563764835, "learning_rate": 7.284141810602851e-06, "loss": 0.4974, "step": 5785 }, { "epoch": 0.74, "grad_norm": 0.7149585941121873, "learning_rate": 7.283224109466422e-06, "loss": 0.5979, "step": 5786 }, { "epoch": 0.74, "grad_norm": 0.7330929300573952, "learning_rate": 7.282306311140439e-06, "loss": 0.6008, "step": 5787 }, { "epoch": 0.74, "grad_norm": 0.5428974476513099, "learning_rate": 7.281388415663969e-06, "loss": 0.5337, "step": 5788 }, { "epoch": 0.74, "grad_norm": 0.5691331466082877, "learning_rate": 7.280470423076085e-06, "loss": 0.5148, "step": 5789 }, { "epoch": 0.74, "grad_norm": 0.6695316733450618, "learning_rate": 7.279552333415862e-06, "loss": 0.5428, "step": 5790 }, { "epoch": 0.74, "grad_norm": 0.7811793793546558, "learning_rate": 7.278634146722381e-06, "loss": 0.5439, "step": 5791 }, { "epoch": 0.74, "grad_norm": 0.7569573819473664, "learning_rate": 7.277715863034725e-06, "loss": 0.6132, "step": 5792 }, { "epoch": 0.74, "grad_norm": 0.6489098647452222, "learning_rate": 7.276797482391985e-06, "loss": 0.5182, "step": 5793 }, { "epoch": 0.74, "grad_norm": 0.8661039160399061, "learning_rate": 7.27587900483325e-06, "loss": 0.6641, "step": 5794 }, { "epoch": 0.74, "grad_norm": 0.5820186757358307, "learning_rate": 7.274960430397618e-06, "loss": 0.5221, "step": 5795 }, { "epoch": 0.74, "grad_norm": 0.6325369896018755, "learning_rate": 7.274041759124192e-06, "loss": 0.5318, "step": 5796 }, { "epoch": 0.74, "grad_norm": 0.7142674492910979, "learning_rate": 7.273122991052074e-06, "loss": 0.6028, "step": 5797 }, { "epoch": 0.74, "grad_norm": 0.5693571249743329, "learning_rate": 7.272204126220375e-06, "loss": 0.5323, "step": 5798 }, { "epoch": 0.74, "grad_norm": 0.5726341623341924, "learning_rate": 7.271285164668207e-06, "loss": 0.4993, "step": 5799 }, { "epoch": 0.74, "grad_norm": 0.5720652830387938, "learning_rate": 7.270366106434687e-06, "loss": 0.5106, "step": 5800 }, { "epoch": 0.74, "grad_norm": 0.5975392123756584, "learning_rate": 7.269446951558936e-06, "loss": 0.5266, "step": 5801 }, { "epoch": 0.74, "grad_norm": 0.6988535752173115, "learning_rate": 7.268527700080081e-06, "loss": 0.5369, "step": 5802 }, { "epoch": 0.74, "grad_norm": 0.6046355646140684, "learning_rate": 7.267608352037252e-06, "loss": 0.4992, "step": 5803 }, { "epoch": 0.74, "grad_norm": 0.6186768633041764, "learning_rate": 7.266688907469582e-06, "loss": 0.5125, "step": 5804 }, { "epoch": 0.74, "grad_norm": 0.7500248355786981, "learning_rate": 7.2657693664162074e-06, "loss": 0.5671, "step": 5805 }, { "epoch": 0.74, "grad_norm": 0.7928151669816452, "learning_rate": 7.264849728916272e-06, "loss": 0.6154, "step": 5806 }, { "epoch": 0.74, "grad_norm": 1.0398998040134704, "learning_rate": 7.263929995008921e-06, "loss": 0.6265, "step": 5807 }, { "epoch": 0.74, "grad_norm": 0.6269689146676133, "learning_rate": 7.263010164733303e-06, "loss": 0.4923, "step": 5808 }, { "epoch": 0.74, "grad_norm": 0.7283140190877309, "learning_rate": 7.262090238128575e-06, "loss": 0.5979, "step": 5809 }, { "epoch": 0.74, "grad_norm": 0.836802862033315, "learning_rate": 7.261170215233895e-06, "loss": 0.6063, "step": 5810 }, { "epoch": 0.74, "grad_norm": 0.8295010847206123, "learning_rate": 7.2602500960884235e-06, "loss": 0.6066, "step": 5811 }, { "epoch": 0.74, "grad_norm": 0.7460122452114994, "learning_rate": 7.259329880731328e-06, "loss": 0.6002, "step": 5812 }, { "epoch": 0.74, "grad_norm": 0.5560955965994837, "learning_rate": 7.2584095692017795e-06, "loss": 0.5122, "step": 5813 }, { "epoch": 0.74, "grad_norm": 0.7079701401655037, "learning_rate": 7.257489161538953e-06, "loss": 0.5133, "step": 5814 }, { "epoch": 0.74, "grad_norm": 1.142037142805699, "learning_rate": 7.256568657782026e-06, "loss": 0.5646, "step": 5815 }, { "epoch": 0.74, "grad_norm": 0.6196514172605062, "learning_rate": 7.255648057970184e-06, "loss": 0.5278, "step": 5816 }, { "epoch": 0.74, "grad_norm": 0.660998859609431, "learning_rate": 7.254727362142611e-06, "loss": 0.5407, "step": 5817 }, { "epoch": 0.74, "grad_norm": 0.7939894477975382, "learning_rate": 7.253806570338499e-06, "loss": 0.6261, "step": 5818 }, { "epoch": 0.74, "grad_norm": 0.7881642132198244, "learning_rate": 7.252885682597041e-06, "loss": 0.5966, "step": 5819 }, { "epoch": 0.74, "grad_norm": 0.5466048211434927, "learning_rate": 7.2519646989574435e-06, "loss": 0.5176, "step": 5820 }, { "epoch": 0.74, "grad_norm": 0.8007518272564078, "learning_rate": 7.251043619458902e-06, "loss": 0.5782, "step": 5821 }, { "epoch": 0.74, "grad_norm": 0.7113264111127049, "learning_rate": 7.250122444140628e-06, "loss": 0.6143, "step": 5822 }, { "epoch": 0.74, "grad_norm": 0.5343792154273836, "learning_rate": 7.249201173041832e-06, "loss": 0.4949, "step": 5823 }, { "epoch": 0.74, "grad_norm": 0.6462591485954078, "learning_rate": 7.248279806201729e-06, "loss": 0.5439, "step": 5824 }, { "epoch": 0.74, "grad_norm": 0.70629652786406, "learning_rate": 7.2473583436595384e-06, "loss": 0.5427, "step": 5825 }, { "epoch": 0.74, "grad_norm": 0.633347607682042, "learning_rate": 7.246436785454486e-06, "loss": 0.5787, "step": 5826 }, { "epoch": 0.74, "grad_norm": 0.7389954359806247, "learning_rate": 7.2455151316257975e-06, "loss": 0.5875, "step": 5827 }, { "epoch": 0.74, "grad_norm": 0.8314776720107688, "learning_rate": 7.244593382212706e-06, "loss": 0.5708, "step": 5828 }, { "epoch": 0.74, "grad_norm": 0.5866210514358545, "learning_rate": 7.243671537254446e-06, "loss": 0.5067, "step": 5829 }, { "epoch": 0.74, "grad_norm": 0.6535199784271271, "learning_rate": 7.24274959679026e-06, "loss": 0.5729, "step": 5830 }, { "epoch": 0.74, "grad_norm": 0.7643511709475582, "learning_rate": 7.241827560859388e-06, "loss": 0.5796, "step": 5831 }, { "epoch": 0.74, "grad_norm": 0.8033218630139487, "learning_rate": 7.240905429501083e-06, "loss": 0.6167, "step": 5832 }, { "epoch": 0.74, "grad_norm": 2.264851543870776, "learning_rate": 7.239983202754594e-06, "loss": 0.5799, "step": 5833 }, { "epoch": 0.74, "grad_norm": 0.6654171955223167, "learning_rate": 7.239060880659177e-06, "loss": 0.5984, "step": 5834 }, { "epoch": 0.74, "grad_norm": 0.635724976641325, "learning_rate": 7.238138463254095e-06, "loss": 0.4765, "step": 5835 }, { "epoch": 0.74, "grad_norm": 0.6263943895827097, "learning_rate": 7.23721595057861e-06, "loss": 0.6167, "step": 5836 }, { "epoch": 0.74, "grad_norm": 0.7289171218932294, "learning_rate": 7.2362933426719905e-06, "loss": 0.5762, "step": 5837 }, { "epoch": 0.74, "grad_norm": 0.6279899523271233, "learning_rate": 7.235370639573509e-06, "loss": 0.5283, "step": 5838 }, { "epoch": 0.74, "grad_norm": 0.710489049279531, "learning_rate": 7.234447841322445e-06, "loss": 0.517, "step": 5839 }, { "epoch": 0.74, "grad_norm": 0.6623958418177638, "learning_rate": 7.233524947958075e-06, "loss": 0.5515, "step": 5840 }, { "epoch": 0.74, "grad_norm": 0.6971840183623577, "learning_rate": 7.232601959519685e-06, "loss": 0.5163, "step": 5841 }, { "epoch": 0.74, "grad_norm": 0.631580799135298, "learning_rate": 7.231678876046565e-06, "loss": 0.5337, "step": 5842 }, { "epoch": 0.74, "grad_norm": 0.8002069483257911, "learning_rate": 7.230755697578007e-06, "loss": 0.6165, "step": 5843 }, { "epoch": 0.74, "grad_norm": 0.8090719609189326, "learning_rate": 7.229832424153307e-06, "loss": 0.6482, "step": 5844 }, { "epoch": 0.74, "grad_norm": 0.6438418091126863, "learning_rate": 7.228909055811766e-06, "loss": 0.547, "step": 5845 }, { "epoch": 0.74, "grad_norm": 0.6686358657763787, "learning_rate": 7.227985592592688e-06, "loss": 0.606, "step": 5846 }, { "epoch": 0.74, "grad_norm": 0.5883747889183144, "learning_rate": 7.227062034535384e-06, "loss": 0.5372, "step": 5847 }, { "epoch": 0.75, "grad_norm": 0.7887012703417479, "learning_rate": 7.226138381679165e-06, "loss": 0.6289, "step": 5848 }, { "epoch": 0.75, "grad_norm": 0.8359052276538436, "learning_rate": 7.22521463406335e-06, "loss": 0.6622, "step": 5849 }, { "epoch": 0.75, "grad_norm": 0.8624524633674899, "learning_rate": 7.224290791727259e-06, "loss": 0.6222, "step": 5850 }, { "epoch": 0.75, "grad_norm": 0.6247450915222917, "learning_rate": 7.223366854710216e-06, "loss": 0.5271, "step": 5851 }, { "epoch": 0.75, "grad_norm": 0.7475055977408548, "learning_rate": 7.222442823051552e-06, "loss": 0.6031, "step": 5852 }, { "epoch": 0.75, "grad_norm": 0.7956688269029737, "learning_rate": 7.221518696790597e-06, "loss": 0.621, "step": 5853 }, { "epoch": 0.75, "grad_norm": 0.5937287743492685, "learning_rate": 7.220594475966691e-06, "loss": 0.5155, "step": 5854 }, { "epoch": 0.75, "grad_norm": 0.5664963442322977, "learning_rate": 7.219670160619174e-06, "loss": 0.5153, "step": 5855 }, { "epoch": 0.75, "grad_norm": 0.9856919355818669, "learning_rate": 7.218745750787392e-06, "loss": 0.6404, "step": 5856 }, { "epoch": 0.75, "grad_norm": 0.6901444752687154, "learning_rate": 7.217821246510692e-06, "loss": 0.565, "step": 5857 }, { "epoch": 0.75, "grad_norm": 0.7678632427900924, "learning_rate": 7.216896647828431e-06, "loss": 0.6458, "step": 5858 }, { "epoch": 0.75, "grad_norm": 0.6206510986093188, "learning_rate": 7.215971954779962e-06, "loss": 0.5602, "step": 5859 }, { "epoch": 0.75, "grad_norm": 0.7804973788476395, "learning_rate": 7.21504716740465e-06, "loss": 0.6281, "step": 5860 }, { "epoch": 0.75, "grad_norm": 0.7043479877195835, "learning_rate": 7.2141222857418555e-06, "loss": 0.5742, "step": 5861 }, { "epoch": 0.75, "grad_norm": 0.7256562511537287, "learning_rate": 7.213197309830953e-06, "loss": 0.5702, "step": 5862 }, { "epoch": 0.75, "grad_norm": 0.6173465471508519, "learning_rate": 7.212272239711312e-06, "loss": 0.5248, "step": 5863 }, { "epoch": 0.75, "grad_norm": 0.6334666783535389, "learning_rate": 7.211347075422312e-06, "loss": 0.5136, "step": 5864 }, { "epoch": 0.75, "grad_norm": 0.7329613183016386, "learning_rate": 7.2104218170033325e-06, "loss": 0.5893, "step": 5865 }, { "epoch": 0.75, "grad_norm": 0.8142819818199812, "learning_rate": 7.209496464493762e-06, "loss": 0.6028, "step": 5866 }, { "epoch": 0.75, "grad_norm": 0.6844891015001855, "learning_rate": 7.208571017932985e-06, "loss": 0.5128, "step": 5867 }, { "epoch": 0.75, "grad_norm": 0.8260304063851073, "learning_rate": 7.207645477360399e-06, "loss": 0.5968, "step": 5868 }, { "epoch": 0.75, "grad_norm": 0.6660889071416392, "learning_rate": 7.206719842815398e-06, "loss": 0.5666, "step": 5869 }, { "epoch": 0.75, "grad_norm": 0.9208399051921774, "learning_rate": 7.205794114337385e-06, "loss": 0.6336, "step": 5870 }, { "epoch": 0.75, "grad_norm": 0.6119512037323821, "learning_rate": 7.204868291965767e-06, "loss": 0.544, "step": 5871 }, { "epoch": 0.75, "grad_norm": 0.6525157575923063, "learning_rate": 7.203942375739951e-06, "loss": 0.575, "step": 5872 }, { "epoch": 0.75, "grad_norm": 0.6777254365784581, "learning_rate": 7.2030163656993505e-06, "loss": 0.5898, "step": 5873 }, { "epoch": 0.75, "grad_norm": 0.5781114453839473, "learning_rate": 7.2020902618833836e-06, "loss": 0.535, "step": 5874 }, { "epoch": 0.75, "grad_norm": 0.7283837867809089, "learning_rate": 7.201164064331469e-06, "loss": 0.6501, "step": 5875 }, { "epoch": 0.75, "grad_norm": 0.6009244489423754, "learning_rate": 7.200237773083036e-06, "loss": 0.5303, "step": 5876 }, { "epoch": 0.75, "grad_norm": 0.7275894205842025, "learning_rate": 7.199311388177512e-06, "loss": 0.4724, "step": 5877 }, { "epoch": 0.75, "grad_norm": 0.643228658523126, "learning_rate": 7.198384909654331e-06, "loss": 0.5359, "step": 5878 }, { "epoch": 0.75, "grad_norm": 0.7513516441067521, "learning_rate": 7.197458337552928e-06, "loss": 0.605, "step": 5879 }, { "epoch": 0.75, "grad_norm": 0.6258683148799139, "learning_rate": 7.1965316719127465e-06, "loss": 0.5499, "step": 5880 }, { "epoch": 0.75, "grad_norm": 0.7772835189884585, "learning_rate": 7.1956049127732295e-06, "loss": 0.5744, "step": 5881 }, { "epoch": 0.75, "grad_norm": 0.6216236494210087, "learning_rate": 7.1946780601738285e-06, "loss": 0.5491, "step": 5882 }, { "epoch": 0.75, "grad_norm": 0.5663351592564475, "learning_rate": 7.1937511141539965e-06, "loss": 0.5244, "step": 5883 }, { "epoch": 0.75, "grad_norm": 0.7554566424136938, "learning_rate": 7.192824074753188e-06, "loss": 0.5798, "step": 5884 }, { "epoch": 0.75, "grad_norm": 0.7613029883601454, "learning_rate": 7.191896942010867e-06, "loss": 0.599, "step": 5885 }, { "epoch": 0.75, "grad_norm": 0.8065786710297403, "learning_rate": 7.190969715966498e-06, "loss": 0.6365, "step": 5886 }, { "epoch": 0.75, "grad_norm": 0.5736921731008394, "learning_rate": 7.190042396659548e-06, "loss": 0.5074, "step": 5887 }, { "epoch": 0.75, "grad_norm": 0.6962951030751786, "learning_rate": 7.189114984129492e-06, "loss": 0.6247, "step": 5888 }, { "epoch": 0.75, "grad_norm": 0.6855375517979899, "learning_rate": 7.1881874784158065e-06, "loss": 0.5474, "step": 5889 }, { "epoch": 0.75, "grad_norm": 0.7174166678113488, "learning_rate": 7.187259879557974e-06, "loss": 0.5967, "step": 5890 }, { "epoch": 0.75, "grad_norm": 0.746697031944723, "learning_rate": 7.186332187595477e-06, "loss": 0.5463, "step": 5891 }, { "epoch": 0.75, "grad_norm": 0.6994200247041723, "learning_rate": 7.185404402567805e-06, "loss": 0.6516, "step": 5892 }, { "epoch": 0.75, "grad_norm": 0.8037768005668379, "learning_rate": 7.18447652451445e-06, "loss": 0.5931, "step": 5893 }, { "epoch": 0.75, "grad_norm": 0.8136246375220463, "learning_rate": 7.183548553474912e-06, "loss": 0.5129, "step": 5894 }, { "epoch": 0.75, "grad_norm": 0.5920517296472912, "learning_rate": 7.182620489488689e-06, "loss": 0.5559, "step": 5895 }, { "epoch": 0.75, "grad_norm": 0.7856583055030771, "learning_rate": 7.181692332595286e-06, "loss": 0.6187, "step": 5896 }, { "epoch": 0.75, "grad_norm": 0.8117548383096944, "learning_rate": 7.180764082834213e-06, "loss": 0.6325, "step": 5897 }, { "epoch": 0.75, "grad_norm": 0.8346757934061227, "learning_rate": 7.1798357402449805e-06, "loss": 0.6566, "step": 5898 }, { "epoch": 0.75, "grad_norm": 0.7666718394886585, "learning_rate": 7.178907304867108e-06, "loss": 0.6011, "step": 5899 }, { "epoch": 0.75, "grad_norm": 0.6797956495444377, "learning_rate": 7.177978776740112e-06, "loss": 0.5266, "step": 5900 }, { "epoch": 0.75, "grad_norm": 0.5985809901293472, "learning_rate": 7.17705015590352e-06, "loss": 0.5502, "step": 5901 }, { "epoch": 0.75, "grad_norm": 0.6179930592407553, "learning_rate": 7.176121442396861e-06, "loss": 0.5399, "step": 5902 }, { "epoch": 0.75, "grad_norm": 0.887166665193454, "learning_rate": 7.175192636259666e-06, "loss": 0.6518, "step": 5903 }, { "epoch": 0.75, "grad_norm": 0.6767972616304102, "learning_rate": 7.174263737531471e-06, "loss": 0.5321, "step": 5904 }, { "epoch": 0.75, "grad_norm": 0.7545027952937109, "learning_rate": 7.173334746251815e-06, "loss": 0.5955, "step": 5905 }, { "epoch": 0.75, "grad_norm": 0.694087376964566, "learning_rate": 7.172405662460247e-06, "loss": 0.6055, "step": 5906 }, { "epoch": 0.75, "grad_norm": 0.5405844381160699, "learning_rate": 7.1714764861963095e-06, "loss": 0.5076, "step": 5907 }, { "epoch": 0.75, "grad_norm": 0.6662841454229068, "learning_rate": 7.170547217499557e-06, "loss": 0.5525, "step": 5908 }, { "epoch": 0.75, "grad_norm": 0.6366622953428074, "learning_rate": 7.169617856409547e-06, "loss": 0.5104, "step": 5909 }, { "epoch": 0.75, "grad_norm": 0.7557653178685522, "learning_rate": 7.168688402965837e-06, "loss": 0.6106, "step": 5910 }, { "epoch": 0.75, "grad_norm": 0.5698851246736489, "learning_rate": 7.167758857207992e-06, "loss": 0.5251, "step": 5911 }, { "epoch": 0.75, "grad_norm": 0.6579949979231071, "learning_rate": 7.16682921917558e-06, "loss": 0.4939, "step": 5912 }, { "epoch": 0.75, "grad_norm": 1.9267038564495211, "learning_rate": 7.165899488908171e-06, "loss": 0.6138, "step": 5913 }, { "epoch": 0.75, "grad_norm": 0.6137144751518273, "learning_rate": 7.1649696664453435e-06, "loss": 0.4945, "step": 5914 }, { "epoch": 0.75, "grad_norm": 0.6336469828178837, "learning_rate": 7.164039751826675e-06, "loss": 0.5368, "step": 5915 }, { "epoch": 0.75, "grad_norm": 0.5332662473473622, "learning_rate": 7.16310974509175e-06, "loss": 0.5095, "step": 5916 }, { "epoch": 0.75, "grad_norm": 0.6380657288145953, "learning_rate": 7.162179646280155e-06, "loss": 0.5039, "step": 5917 }, { "epoch": 0.75, "grad_norm": 0.5217335558926929, "learning_rate": 7.161249455431481e-06, "loss": 0.5023, "step": 5918 }, { "epoch": 0.75, "grad_norm": 0.7476408907405009, "learning_rate": 7.160319172585325e-06, "loss": 0.6326, "step": 5919 }, { "epoch": 0.75, "grad_norm": 0.8369853122374212, "learning_rate": 7.159388797781285e-06, "loss": 0.6343, "step": 5920 }, { "epoch": 0.75, "grad_norm": 0.8397283935017545, "learning_rate": 7.1584583310589635e-06, "loss": 0.6006, "step": 5921 }, { "epoch": 0.75, "grad_norm": 0.686576869167361, "learning_rate": 7.15752777245797e-06, "loss": 0.5358, "step": 5922 }, { "epoch": 0.75, "grad_norm": 0.8897349901289278, "learning_rate": 7.156597122017913e-06, "loss": 0.6119, "step": 5923 }, { "epoch": 0.75, "grad_norm": 0.6827827637178683, "learning_rate": 7.155666379778407e-06, "loss": 0.5499, "step": 5924 }, { "epoch": 0.75, "grad_norm": 0.6486559097349742, "learning_rate": 7.154735545779072e-06, "loss": 0.4898, "step": 5925 }, { "epoch": 0.75, "grad_norm": 1.4057449600176155, "learning_rate": 7.153804620059532e-06, "loss": 0.6217, "step": 5926 }, { "epoch": 0.76, "grad_norm": 0.5506670451254673, "learning_rate": 7.152873602659411e-06, "loss": 0.5245, "step": 5927 }, { "epoch": 0.76, "grad_norm": 0.6546870274474008, "learning_rate": 7.15194249361834e-06, "loss": 0.5798, "step": 5928 }, { "epoch": 0.76, "grad_norm": 0.7520327982372209, "learning_rate": 7.1510112929759544e-06, "loss": 0.6366, "step": 5929 }, { "epoch": 0.76, "grad_norm": 0.6011870153450867, "learning_rate": 7.150080000771892e-06, "loss": 0.5259, "step": 5930 }, { "epoch": 0.76, "grad_norm": 0.5643861021086324, "learning_rate": 7.149148617045793e-06, "loss": 0.5191, "step": 5931 }, { "epoch": 0.76, "grad_norm": 0.6655891581971597, "learning_rate": 7.1482171418373055e-06, "loss": 0.5382, "step": 5932 }, { "epoch": 0.76, "grad_norm": 0.7097024475756677, "learning_rate": 7.14728557518608e-06, "loss": 0.5988, "step": 5933 }, { "epoch": 0.76, "grad_norm": 0.5814307284642501, "learning_rate": 7.146353917131767e-06, "loss": 0.5127, "step": 5934 }, { "epoch": 0.76, "grad_norm": 0.8005391443407877, "learning_rate": 7.145422167714029e-06, "loss": 0.6022, "step": 5935 }, { "epoch": 0.76, "grad_norm": 0.7009976228365044, "learning_rate": 7.144490326972525e-06, "loss": 0.55, "step": 5936 }, { "epoch": 0.76, "grad_norm": 0.6720608201194977, "learning_rate": 7.143558394946919e-06, "loss": 0.601, "step": 5937 }, { "epoch": 0.76, "grad_norm": 0.9116088358992631, "learning_rate": 7.142626371676883e-06, "loss": 0.6073, "step": 5938 }, { "epoch": 0.76, "grad_norm": 0.6275254875012132, "learning_rate": 7.14169425720209e-06, "loss": 0.527, "step": 5939 }, { "epoch": 0.76, "grad_norm": 3.5094376765807014, "learning_rate": 7.140762051562215e-06, "loss": 0.6293, "step": 5940 }, { "epoch": 0.76, "grad_norm": 0.6549302507765816, "learning_rate": 7.139829754796941e-06, "loss": 0.5013, "step": 5941 }, { "epoch": 0.76, "grad_norm": 0.612729566792426, "learning_rate": 7.1388973669459515e-06, "loss": 0.5466, "step": 5942 }, { "epoch": 0.76, "grad_norm": 0.7536579417526534, "learning_rate": 7.137964888048936e-06, "loss": 0.6376, "step": 5943 }, { "epoch": 0.76, "grad_norm": 0.9117217692848721, "learning_rate": 7.137032318145588e-06, "loss": 0.6506, "step": 5944 }, { "epoch": 0.76, "grad_norm": 0.7630075700909473, "learning_rate": 7.1360996572756034e-06, "loss": 0.6294, "step": 5945 }, { "epoch": 0.76, "grad_norm": 0.6414206866660678, "learning_rate": 7.1351669054786825e-06, "loss": 0.552, "step": 5946 }, { "epoch": 0.76, "grad_norm": 0.7169280605097303, "learning_rate": 7.134234062794528e-06, "loss": 0.6154, "step": 5947 }, { "epoch": 0.76, "grad_norm": 0.5957026845046108, "learning_rate": 7.1333011292628506e-06, "loss": 0.5875, "step": 5948 }, { "epoch": 0.76, "grad_norm": 0.6799298689048163, "learning_rate": 7.132368104923359e-06, "loss": 0.5323, "step": 5949 }, { "epoch": 0.76, "grad_norm": 0.6339070219489517, "learning_rate": 7.131434989815774e-06, "loss": 0.5409, "step": 5950 }, { "epoch": 0.76, "grad_norm": 0.8221592820088667, "learning_rate": 7.130501783979813e-06, "loss": 0.6135, "step": 5951 }, { "epoch": 0.76, "grad_norm": 0.7197916486344914, "learning_rate": 7.129568487455197e-06, "loss": 0.5424, "step": 5952 }, { "epoch": 0.76, "grad_norm": 0.7347070262150646, "learning_rate": 7.128635100281657e-06, "loss": 0.5833, "step": 5953 }, { "epoch": 0.76, "grad_norm": 0.7357879577306148, "learning_rate": 7.1277016224989224e-06, "loss": 0.5877, "step": 5954 }, { "epoch": 0.76, "grad_norm": 0.8899153229202661, "learning_rate": 7.126768054146729e-06, "loss": 0.621, "step": 5955 }, { "epoch": 0.76, "grad_norm": 0.8080942458176608, "learning_rate": 7.125834395264815e-06, "loss": 0.6003, "step": 5956 }, { "epoch": 0.76, "grad_norm": 0.7409659378027762, "learning_rate": 7.124900645892925e-06, "loss": 0.5787, "step": 5957 }, { "epoch": 0.76, "grad_norm": 0.5636619965707601, "learning_rate": 7.123966806070804e-06, "loss": 0.5148, "step": 5958 }, { "epoch": 0.76, "grad_norm": 0.6939137855049864, "learning_rate": 7.123032875838204e-06, "loss": 0.5317, "step": 5959 }, { "epoch": 0.76, "grad_norm": 0.6169178512654427, "learning_rate": 7.122098855234879e-06, "loss": 0.5068, "step": 5960 }, { "epoch": 0.76, "grad_norm": 0.7670689037036561, "learning_rate": 7.121164744300586e-06, "loss": 0.6026, "step": 5961 }, { "epoch": 0.76, "grad_norm": 0.5868224830446753, "learning_rate": 7.120230543075088e-06, "loss": 0.5358, "step": 5962 }, { "epoch": 0.76, "grad_norm": 0.5930265037171247, "learning_rate": 7.119296251598152e-06, "loss": 0.566, "step": 5963 }, { "epoch": 0.76, "grad_norm": 0.5908456018357738, "learning_rate": 7.118361869909547e-06, "loss": 0.5208, "step": 5964 }, { "epoch": 0.76, "grad_norm": 0.5593592665307044, "learning_rate": 7.117427398049045e-06, "loss": 0.5258, "step": 5965 }, { "epoch": 0.76, "grad_norm": 0.65059181906227, "learning_rate": 7.116492836056427e-06, "loss": 0.5709, "step": 5966 }, { "epoch": 0.76, "grad_norm": 0.7629845456020955, "learning_rate": 7.11555818397147e-06, "loss": 0.5841, "step": 5967 }, { "epoch": 0.76, "grad_norm": 0.7274199972082073, "learning_rate": 7.1146234418339635e-06, "loss": 0.6031, "step": 5968 }, { "epoch": 0.76, "grad_norm": 0.5852919273140857, "learning_rate": 7.1136886096836935e-06, "loss": 0.5653, "step": 5969 }, { "epoch": 0.76, "grad_norm": 0.7087866747568418, "learning_rate": 7.112753687560454e-06, "loss": 0.5917, "step": 5970 }, { "epoch": 0.76, "grad_norm": 0.56202775250031, "learning_rate": 7.111818675504041e-06, "loss": 0.5322, "step": 5971 }, { "epoch": 0.76, "grad_norm": 0.5920179015436181, "learning_rate": 7.1108835735542556e-06, "loss": 0.5292, "step": 5972 }, { "epoch": 0.76, "grad_norm": 0.7633463885951367, "learning_rate": 7.109948381750902e-06, "loss": 0.5593, "step": 5973 }, { "epoch": 0.76, "grad_norm": 0.6123231443472408, "learning_rate": 7.109013100133789e-06, "loss": 0.5001, "step": 5974 }, { "epoch": 0.76, "grad_norm": 0.8101548896370763, "learning_rate": 7.108077728742727e-06, "loss": 0.6194, "step": 5975 }, { "epoch": 0.76, "grad_norm": 0.6193684625645861, "learning_rate": 7.107142267617533e-06, "loss": 0.5115, "step": 5976 }, { "epoch": 0.76, "grad_norm": 0.5573063001403132, "learning_rate": 7.106206716798025e-06, "loss": 0.5082, "step": 5977 }, { "epoch": 0.76, "grad_norm": 0.7036027776981749, "learning_rate": 7.105271076324028e-06, "loss": 0.5874, "step": 5978 }, { "epoch": 0.76, "grad_norm": 0.652049101670856, "learning_rate": 7.10433534623537e-06, "loss": 0.5015, "step": 5979 }, { "epoch": 0.76, "grad_norm": 0.5763090882467994, "learning_rate": 7.103399526571879e-06, "loss": 0.5307, "step": 5980 }, { "epoch": 0.76, "grad_norm": 0.674680021102911, "learning_rate": 7.102463617373392e-06, "loss": 0.5083, "step": 5981 }, { "epoch": 0.76, "grad_norm": 0.6330173496203505, "learning_rate": 7.101527618679749e-06, "loss": 0.5371, "step": 5982 }, { "epoch": 0.76, "grad_norm": 0.7500296472281367, "learning_rate": 7.10059153053079e-06, "loss": 0.4961, "step": 5983 }, { "epoch": 0.76, "grad_norm": 0.7230364666967767, "learning_rate": 7.099655352966361e-06, "loss": 0.565, "step": 5984 }, { "epoch": 0.76, "grad_norm": 0.7281334536321968, "learning_rate": 7.098719086026313e-06, "loss": 0.5901, "step": 5985 }, { "epoch": 0.76, "grad_norm": 0.5669714110538367, "learning_rate": 7.097782729750501e-06, "loss": 0.552, "step": 5986 }, { "epoch": 0.76, "grad_norm": 0.5776175088594866, "learning_rate": 7.0968462841787825e-06, "loss": 0.5161, "step": 5987 }, { "epoch": 0.76, "grad_norm": 0.8035701307743665, "learning_rate": 7.095909749351016e-06, "loss": 0.5945, "step": 5988 }, { "epoch": 0.76, "grad_norm": 0.5749391162769852, "learning_rate": 7.094973125307071e-06, "loss": 0.5199, "step": 5989 }, { "epoch": 0.76, "grad_norm": 0.6034501105002962, "learning_rate": 7.094036412086815e-06, "loss": 0.5526, "step": 5990 }, { "epoch": 0.76, "grad_norm": 0.6351080516770963, "learning_rate": 7.093099609730118e-06, "loss": 0.584, "step": 5991 }, { "epoch": 0.76, "grad_norm": 0.616218268716067, "learning_rate": 7.092162718276861e-06, "loss": 0.5596, "step": 5992 }, { "epoch": 0.76, "grad_norm": 0.6133377103476508, "learning_rate": 7.091225737766923e-06, "loss": 0.549, "step": 5993 }, { "epoch": 0.76, "grad_norm": 0.6974070658555137, "learning_rate": 7.090288668240187e-06, "loss": 0.5781, "step": 5994 }, { "epoch": 0.76, "grad_norm": 0.6675311153928515, "learning_rate": 7.089351509736543e-06, "loss": 0.5763, "step": 5995 }, { "epoch": 0.76, "grad_norm": 0.756118850204332, "learning_rate": 7.0884142622958805e-06, "loss": 0.6154, "step": 5996 }, { "epoch": 0.76, "grad_norm": 0.6698436678644841, "learning_rate": 7.087476925958098e-06, "loss": 0.5872, "step": 5997 }, { "epoch": 0.76, "grad_norm": 0.5878670451744555, "learning_rate": 7.086539500763092e-06, "loss": 0.5009, "step": 5998 }, { "epoch": 0.76, "grad_norm": 0.6414485430906762, "learning_rate": 7.085601986750767e-06, "loss": 0.5466, "step": 5999 }, { "epoch": 0.76, "grad_norm": 0.7238108126282299, "learning_rate": 7.084664383961031e-06, "loss": 0.5699, "step": 6000 }, { "epoch": 0.76, "grad_norm": 0.6678033522689986, "learning_rate": 7.083726692433793e-06, "loss": 0.6002, "step": 6001 }, { "epoch": 0.76, "grad_norm": 0.7837549105544315, "learning_rate": 7.08278891220897e-06, "loss": 0.6126, "step": 6002 }, { "epoch": 0.76, "grad_norm": 0.6074108868955742, "learning_rate": 7.081851043326477e-06, "loss": 0.5207, "step": 6003 }, { "epoch": 0.76, "grad_norm": 0.7707852242079446, "learning_rate": 7.0809130858262385e-06, "loss": 0.5875, "step": 6004 }, { "epoch": 0.77, "grad_norm": 0.8314941488122427, "learning_rate": 7.079975039748179e-06, "loss": 0.681, "step": 6005 }, { "epoch": 0.77, "grad_norm": 0.7864963641232625, "learning_rate": 7.0790369051322285e-06, "loss": 0.6382, "step": 6006 }, { "epoch": 0.77, "grad_norm": 0.545907604352695, "learning_rate": 7.0780986820183215e-06, "loss": 0.5221, "step": 6007 }, { "epoch": 0.77, "grad_norm": 0.8486654749552658, "learning_rate": 7.077160370446395e-06, "loss": 0.6261, "step": 6008 }, { "epoch": 0.77, "grad_norm": 0.8072855106678415, "learning_rate": 7.076221970456387e-06, "loss": 0.6484, "step": 6009 }, { "epoch": 0.77, "grad_norm": 0.5287191367170836, "learning_rate": 7.075283482088246e-06, "loss": 0.4511, "step": 6010 }, { "epoch": 0.77, "grad_norm": 0.5741171173985696, "learning_rate": 7.074344905381917e-06, "loss": 0.5015, "step": 6011 }, { "epoch": 0.77, "grad_norm": 1.1406092082545656, "learning_rate": 7.073406240377356e-06, "loss": 0.5978, "step": 6012 }, { "epoch": 0.77, "grad_norm": 0.7210962449068865, "learning_rate": 7.072467487114516e-06, "loss": 0.5889, "step": 6013 }, { "epoch": 0.77, "grad_norm": 0.9730920088064027, "learning_rate": 7.071528645633358e-06, "loss": 0.6462, "step": 6014 }, { "epoch": 0.77, "grad_norm": 0.5686619356341395, "learning_rate": 7.070589715973845e-06, "loss": 0.5222, "step": 6015 }, { "epoch": 0.77, "grad_norm": 0.582273852585019, "learning_rate": 7.069650698175945e-06, "loss": 0.5512, "step": 6016 }, { "epoch": 0.77, "grad_norm": 0.5794857780835623, "learning_rate": 7.068711592279628e-06, "loss": 0.5046, "step": 6017 }, { "epoch": 0.77, "grad_norm": 0.8308853893111957, "learning_rate": 7.067772398324869e-06, "loss": 0.6769, "step": 6018 }, { "epoch": 0.77, "grad_norm": 0.8871208267240814, "learning_rate": 7.0668331163516475e-06, "loss": 0.6243, "step": 6019 }, { "epoch": 0.77, "grad_norm": 0.7353649761204798, "learning_rate": 7.065893746399945e-06, "loss": 0.6082, "step": 6020 }, { "epoch": 0.77, "grad_norm": 0.8206904928576645, "learning_rate": 7.0649542885097464e-06, "loss": 0.6601, "step": 6021 }, { "epoch": 0.77, "grad_norm": 0.7994782475497233, "learning_rate": 7.064014742721043e-06, "loss": 0.5921, "step": 6022 }, { "epoch": 0.77, "grad_norm": 0.6300920174866359, "learning_rate": 7.063075109073827e-06, "loss": 0.5324, "step": 6023 }, { "epoch": 0.77, "grad_norm": 0.7172389415559045, "learning_rate": 7.062135387608095e-06, "loss": 0.5691, "step": 6024 }, { "epoch": 0.77, "grad_norm": 0.6011265374447058, "learning_rate": 7.0611955783638516e-06, "loss": 0.5494, "step": 6025 }, { "epoch": 0.77, "grad_norm": 0.7913267524061012, "learning_rate": 7.060255681381098e-06, "loss": 0.5861, "step": 6026 }, { "epoch": 0.77, "grad_norm": 0.5408541454873202, "learning_rate": 7.0593156966998445e-06, "loss": 0.4974, "step": 6027 }, { "epoch": 0.77, "grad_norm": 0.7635874309264382, "learning_rate": 7.058375624360102e-06, "loss": 0.6004, "step": 6028 }, { "epoch": 0.77, "grad_norm": 0.5356125016899917, "learning_rate": 7.057435464401887e-06, "loss": 0.5235, "step": 6029 }, { "epoch": 0.77, "grad_norm": 0.626775555298914, "learning_rate": 7.056495216865219e-06, "loss": 0.5085, "step": 6030 }, { "epoch": 0.77, "grad_norm": 0.8547834497440239, "learning_rate": 7.055554881790121e-06, "loss": 0.6407, "step": 6031 }, { "epoch": 0.77, "grad_norm": 0.7310096864764011, "learning_rate": 7.054614459216619e-06, "loss": 0.6107, "step": 6032 }, { "epoch": 0.77, "grad_norm": 0.5757760455675806, "learning_rate": 7.0536739491847465e-06, "loss": 0.5305, "step": 6033 }, { "epoch": 0.77, "grad_norm": 0.7645665550905649, "learning_rate": 7.052733351734537e-06, "loss": 0.6176, "step": 6034 }, { "epoch": 0.77, "grad_norm": 0.8265033598092296, "learning_rate": 7.051792666906027e-06, "loss": 0.6061, "step": 6035 }, { "epoch": 0.77, "grad_norm": 0.6837039457234325, "learning_rate": 7.0508518947392614e-06, "loss": 0.5634, "step": 6036 }, { "epoch": 0.77, "grad_norm": 0.586998764721055, "learning_rate": 7.049911035274283e-06, "loss": 0.5124, "step": 6037 }, { "epoch": 0.77, "grad_norm": 0.5892951271709775, "learning_rate": 7.048970088551143e-06, "loss": 0.517, "step": 6038 }, { "epoch": 0.77, "grad_norm": 0.6512704912070886, "learning_rate": 7.048029054609893e-06, "loss": 0.5707, "step": 6039 }, { "epoch": 0.77, "grad_norm": 0.7442628176487647, "learning_rate": 7.047087933490591e-06, "loss": 0.597, "step": 6040 }, { "epoch": 0.77, "grad_norm": 0.6634565714644403, "learning_rate": 7.0461467252332984e-06, "loss": 0.4795, "step": 6041 }, { "epoch": 0.77, "grad_norm": 0.7873848421662484, "learning_rate": 7.045205429878076e-06, "loss": 0.6211, "step": 6042 }, { "epoch": 0.77, "grad_norm": 0.6345169378203459, "learning_rate": 7.044264047464995e-06, "loss": 0.4902, "step": 6043 }, { "epoch": 0.77, "grad_norm": 0.6048493037612452, "learning_rate": 7.0433225780341265e-06, "loss": 0.5271, "step": 6044 }, { "epoch": 0.77, "grad_norm": 0.5987685909027206, "learning_rate": 7.042381021625546e-06, "loss": 0.5846, "step": 6045 }, { "epoch": 0.77, "grad_norm": 0.761014568982348, "learning_rate": 7.0414393782793315e-06, "loss": 0.5534, "step": 6046 }, { "epoch": 0.77, "grad_norm": 0.5899132072894913, "learning_rate": 7.040497648035565e-06, "loss": 0.5201, "step": 6047 }, { "epoch": 0.77, "grad_norm": 0.7087852389345419, "learning_rate": 7.0395558309343345e-06, "loss": 0.6047, "step": 6048 }, { "epoch": 0.77, "grad_norm": 0.877431540022819, "learning_rate": 7.03861392701573e-06, "loss": 0.6155, "step": 6049 }, { "epoch": 0.77, "grad_norm": 0.5516692086699265, "learning_rate": 7.037671936319845e-06, "loss": 0.4954, "step": 6050 }, { "epoch": 0.77, "grad_norm": 0.5984557294666936, "learning_rate": 7.036729858886779e-06, "loss": 0.5467, "step": 6051 }, { "epoch": 0.77, "grad_norm": 0.5747723406091592, "learning_rate": 7.035787694756629e-06, "loss": 0.547, "step": 6052 }, { "epoch": 0.77, "grad_norm": 0.6071889743209812, "learning_rate": 7.0348454439695035e-06, "loss": 0.5746, "step": 6053 }, { "epoch": 0.77, "grad_norm": 0.7304023891510193, "learning_rate": 7.033903106565509e-06, "loss": 0.6284, "step": 6054 }, { "epoch": 0.77, "grad_norm": 1.6570541196986908, "learning_rate": 7.032960682584758e-06, "loss": 0.6438, "step": 6055 }, { "epoch": 0.77, "grad_norm": 0.5953741344451474, "learning_rate": 7.03201817206737e-06, "loss": 0.5526, "step": 6056 }, { "epoch": 0.77, "grad_norm": 0.7390360241547438, "learning_rate": 7.031075575053459e-06, "loss": 0.6218, "step": 6057 }, { "epoch": 0.77, "grad_norm": 0.6182872596546606, "learning_rate": 7.030132891583152e-06, "loss": 0.55, "step": 6058 }, { "epoch": 0.77, "grad_norm": 0.5474958207745392, "learning_rate": 7.029190121696577e-06, "loss": 0.496, "step": 6059 }, { "epoch": 0.77, "grad_norm": 0.8319296740253522, "learning_rate": 7.028247265433861e-06, "loss": 0.6289, "step": 6060 }, { "epoch": 0.77, "grad_norm": 0.6632324180366329, "learning_rate": 7.0273043228351404e-06, "loss": 0.6, "step": 6061 }, { "epoch": 0.77, "grad_norm": 0.7093942999794963, "learning_rate": 7.0263612939405524e-06, "loss": 0.568, "step": 6062 }, { "epoch": 0.77, "grad_norm": 0.672908521751254, "learning_rate": 7.025418178790241e-06, "loss": 0.5213, "step": 6063 }, { "epoch": 0.77, "grad_norm": 0.880776464295765, "learning_rate": 7.0244749774243495e-06, "loss": 0.5648, "step": 6064 }, { "epoch": 0.77, "grad_norm": 0.6353285734379588, "learning_rate": 7.0235316898830274e-06, "loss": 0.5592, "step": 6065 }, { "epoch": 0.77, "grad_norm": 0.80600462935185, "learning_rate": 7.022588316206427e-06, "loss": 0.664, "step": 6066 }, { "epoch": 0.77, "grad_norm": 0.6758317174509741, "learning_rate": 7.021644856434705e-06, "loss": 0.5806, "step": 6067 }, { "epoch": 0.77, "grad_norm": 0.6523254610023184, "learning_rate": 7.0207013106080225e-06, "loss": 0.5572, "step": 6068 }, { "epoch": 0.77, "grad_norm": 0.5382601567959718, "learning_rate": 7.019757678766544e-06, "loss": 0.541, "step": 6069 }, { "epoch": 0.77, "grad_norm": 0.6813693905307426, "learning_rate": 7.018813960950432e-06, "loss": 0.5508, "step": 6070 }, { "epoch": 0.77, "grad_norm": 0.745718707449539, "learning_rate": 7.0178701571998644e-06, "loss": 0.5573, "step": 6071 }, { "epoch": 0.77, "grad_norm": 0.6610284907289825, "learning_rate": 7.01692626755501e-06, "loss": 0.5677, "step": 6072 }, { "epoch": 0.77, "grad_norm": 0.7525283827944745, "learning_rate": 7.01598229205605e-06, "loss": 0.5415, "step": 6073 }, { "epoch": 0.77, "grad_norm": 0.6812398425879042, "learning_rate": 7.015038230743165e-06, "loss": 0.5766, "step": 6074 }, { "epoch": 0.77, "grad_norm": 0.6376153279099177, "learning_rate": 7.014094083656544e-06, "loss": 0.5339, "step": 6075 }, { "epoch": 0.77, "grad_norm": 0.6617176435761051, "learning_rate": 7.013149850836373e-06, "loss": 0.5468, "step": 6076 }, { "epoch": 0.77, "grad_norm": 0.612352892101539, "learning_rate": 7.0122055323228455e-06, "loss": 0.5113, "step": 6077 }, { "epoch": 0.77, "grad_norm": 0.5283091130467976, "learning_rate": 7.0112611281561595e-06, "loss": 0.494, "step": 6078 }, { "epoch": 0.77, "grad_norm": 0.6262043520584162, "learning_rate": 7.010316638376513e-06, "loss": 0.5996, "step": 6079 }, { "epoch": 0.77, "grad_norm": 0.6629502911278257, "learning_rate": 7.009372063024111e-06, "loss": 0.5422, "step": 6080 }, { "epoch": 0.77, "grad_norm": 0.7604354893945472, "learning_rate": 7.008427402139163e-06, "loss": 0.6251, "step": 6081 }, { "epoch": 0.77, "grad_norm": 0.7047594686504335, "learning_rate": 7.007482655761878e-06, "loss": 0.5861, "step": 6082 }, { "epoch": 0.77, "grad_norm": 0.8143829922209819, "learning_rate": 7.006537823932471e-06, "loss": 0.601, "step": 6083 }, { "epoch": 0.78, "grad_norm": 0.5984028371930139, "learning_rate": 7.005592906691161e-06, "loss": 0.5225, "step": 6084 }, { "epoch": 0.78, "grad_norm": 0.8038181984577378, "learning_rate": 7.00464790407817e-06, "loss": 0.612, "step": 6085 }, { "epoch": 0.78, "grad_norm": 0.7853545575807502, "learning_rate": 7.003702816133724e-06, "loss": 0.5401, "step": 6086 }, { "epoch": 0.78, "grad_norm": 0.5447574378687547, "learning_rate": 7.002757642898052e-06, "loss": 0.5152, "step": 6087 }, { "epoch": 0.78, "grad_norm": 0.7508248008587622, "learning_rate": 7.001812384411386e-06, "loss": 0.5991, "step": 6088 }, { "epoch": 0.78, "grad_norm": 0.6071495688858949, "learning_rate": 7.000867040713964e-06, "loss": 0.5257, "step": 6089 }, { "epoch": 0.78, "grad_norm": 0.6580350397288754, "learning_rate": 6.999921611846027e-06, "loss": 0.5114, "step": 6090 }, { "epoch": 0.78, "grad_norm": 0.8296385344887232, "learning_rate": 6.9989760978478176e-06, "loss": 0.6806, "step": 6091 }, { "epoch": 0.78, "grad_norm": 0.7362209605189728, "learning_rate": 6.998030498759583e-06, "loss": 0.5777, "step": 6092 }, { "epoch": 0.78, "grad_norm": 0.6859307273497655, "learning_rate": 6.9970848146215744e-06, "loss": 0.5421, "step": 6093 }, { "epoch": 0.78, "grad_norm": 0.5844085917491771, "learning_rate": 6.996139045474046e-06, "loss": 0.532, "step": 6094 }, { "epoch": 0.78, "grad_norm": 0.6592499049348985, "learning_rate": 6.995193191357259e-06, "loss": 0.563, "step": 6095 }, { "epoch": 0.78, "grad_norm": 0.5713688234567691, "learning_rate": 6.994247252311473e-06, "loss": 0.5138, "step": 6096 }, { "epoch": 0.78, "grad_norm": 0.6933565529333269, "learning_rate": 6.993301228376955e-06, "loss": 0.5335, "step": 6097 }, { "epoch": 0.78, "grad_norm": 0.9250161177146866, "learning_rate": 6.992355119593973e-06, "loss": 0.622, "step": 6098 }, { "epoch": 0.78, "grad_norm": 0.7224784839760994, "learning_rate": 6.9914089260028e-06, "loss": 0.5998, "step": 6099 }, { "epoch": 0.78, "grad_norm": 0.774091694077413, "learning_rate": 6.990462647643713e-06, "loss": 0.6345, "step": 6100 }, { "epoch": 0.78, "grad_norm": 0.5607801855546023, "learning_rate": 6.989516284556992e-06, "loss": 0.4911, "step": 6101 }, { "epoch": 0.78, "grad_norm": 0.7074015476510126, "learning_rate": 6.988569836782919e-06, "loss": 0.4826, "step": 6102 }, { "epoch": 0.78, "grad_norm": 0.8538333881835464, "learning_rate": 6.9876233043617835e-06, "loss": 0.6135, "step": 6103 }, { "epoch": 0.78, "grad_norm": 0.6198661983849384, "learning_rate": 6.986676687333874e-06, "loss": 0.5423, "step": 6104 }, { "epoch": 0.78, "grad_norm": 0.6908445499513478, "learning_rate": 6.985729985739489e-06, "loss": 0.526, "step": 6105 }, { "epoch": 0.78, "grad_norm": 0.6352547873271959, "learning_rate": 6.984783199618922e-06, "loss": 0.6285, "step": 6106 }, { "epoch": 0.78, "grad_norm": 0.5914740537912223, "learning_rate": 6.9838363290124776e-06, "loss": 0.5254, "step": 6107 }, { "epoch": 0.78, "grad_norm": 1.2801019088613952, "learning_rate": 6.98288937396046e-06, "loss": 0.6288, "step": 6108 }, { "epoch": 0.78, "grad_norm": 0.9335250808678409, "learning_rate": 6.981942334503178e-06, "loss": 0.6318, "step": 6109 }, { "epoch": 0.78, "grad_norm": 0.5849128006547969, "learning_rate": 6.980995210680943e-06, "loss": 0.5099, "step": 6110 }, { "epoch": 0.78, "grad_norm": 0.5681560596857154, "learning_rate": 6.980048002534074e-06, "loss": 0.5909, "step": 6111 }, { "epoch": 0.78, "grad_norm": 0.5871495885697342, "learning_rate": 6.979100710102888e-06, "loss": 0.485, "step": 6112 }, { "epoch": 0.78, "grad_norm": 0.8050561993364773, "learning_rate": 6.97815333342771e-06, "loss": 0.6033, "step": 6113 }, { "epoch": 0.78, "grad_norm": 0.7343342902711865, "learning_rate": 6.9772058725488655e-06, "loss": 0.6465, "step": 6114 }, { "epoch": 0.78, "grad_norm": 0.6408911840395797, "learning_rate": 6.9762583275066855e-06, "loss": 0.5082, "step": 6115 }, { "epoch": 0.78, "grad_norm": 0.7044054058239293, "learning_rate": 6.975310698341503e-06, "loss": 0.5339, "step": 6116 }, { "epoch": 0.78, "grad_norm": 0.5868739593970147, "learning_rate": 6.974362985093656e-06, "loss": 0.5566, "step": 6117 }, { "epoch": 0.78, "grad_norm": 0.6440622083548919, "learning_rate": 6.973415187803486e-06, "loss": 0.5513, "step": 6118 }, { "epoch": 0.78, "grad_norm": 0.7066928954633134, "learning_rate": 6.972467306511338e-06, "loss": 0.5257, "step": 6119 }, { "epoch": 0.78, "grad_norm": 0.7700288467201174, "learning_rate": 6.97151934125756e-06, "loss": 0.5991, "step": 6120 }, { "epoch": 0.78, "grad_norm": 0.5283508544117611, "learning_rate": 6.970571292082504e-06, "loss": 0.5175, "step": 6121 }, { "epoch": 0.78, "grad_norm": 0.8470524703983034, "learning_rate": 6.969623159026524e-06, "loss": 0.6532, "step": 6122 }, { "epoch": 0.78, "grad_norm": 0.6218391939761694, "learning_rate": 6.96867494212998e-06, "loss": 0.5331, "step": 6123 }, { "epoch": 0.78, "grad_norm": 0.7473198316412153, "learning_rate": 6.967726641433236e-06, "loss": 0.6415, "step": 6124 }, { "epoch": 0.78, "grad_norm": 0.5365926497601979, "learning_rate": 6.966778256976656e-06, "loss": 0.4869, "step": 6125 }, { "epoch": 0.78, "grad_norm": 0.914592815355564, "learning_rate": 6.965829788800611e-06, "loss": 0.6547, "step": 6126 }, { "epoch": 0.78, "grad_norm": 0.6601173258279925, "learning_rate": 6.964881236945474e-06, "loss": 0.5607, "step": 6127 }, { "epoch": 0.78, "grad_norm": 0.6843616194760515, "learning_rate": 6.963932601451621e-06, "loss": 0.5407, "step": 6128 }, { "epoch": 0.78, "grad_norm": 0.7183738774638622, "learning_rate": 6.962983882359433e-06, "loss": 0.6158, "step": 6129 }, { "epoch": 0.78, "grad_norm": 0.7680164649880659, "learning_rate": 6.962035079709294e-06, "loss": 0.6409, "step": 6130 }, { "epoch": 0.78, "grad_norm": 0.6684581297969855, "learning_rate": 6.9610861935415915e-06, "loss": 0.5631, "step": 6131 }, { "epoch": 0.78, "grad_norm": 0.8363427677623987, "learning_rate": 6.960137223896717e-06, "loss": 0.6002, "step": 6132 }, { "epoch": 0.78, "grad_norm": 0.7994319823281306, "learning_rate": 6.9591881708150645e-06, "loss": 0.5981, "step": 6133 }, { "epoch": 0.78, "grad_norm": 0.6396602571762071, "learning_rate": 6.958239034337032e-06, "loss": 0.48, "step": 6134 }, { "epoch": 0.78, "grad_norm": 0.9312708990308008, "learning_rate": 6.95728981450302e-06, "loss": 0.5926, "step": 6135 }, { "epoch": 0.78, "grad_norm": 0.7540675079988468, "learning_rate": 6.956340511353436e-06, "loss": 0.6073, "step": 6136 }, { "epoch": 0.78, "grad_norm": 0.7620861994525625, "learning_rate": 6.95539112492869e-06, "loss": 0.5923, "step": 6137 }, { "epoch": 0.78, "grad_norm": 0.7773320261409922, "learning_rate": 6.95444165526919e-06, "loss": 0.6306, "step": 6138 }, { "epoch": 0.78, "grad_norm": 0.6345931819255022, "learning_rate": 6.953492102415355e-06, "loss": 0.497, "step": 6139 }, { "epoch": 0.78, "grad_norm": 0.7509745325030854, "learning_rate": 6.9525424664076046e-06, "loss": 0.6594, "step": 6140 }, { "epoch": 0.78, "grad_norm": 0.8573200746023567, "learning_rate": 6.9515927472863595e-06, "loss": 0.6061, "step": 6141 }, { "epoch": 0.78, "grad_norm": 0.6054241820101158, "learning_rate": 6.950642945092047e-06, "loss": 0.5233, "step": 6142 }, { "epoch": 0.78, "grad_norm": 0.8119505648001418, "learning_rate": 6.9496930598651e-06, "loss": 0.609, "step": 6143 }, { "epoch": 0.78, "grad_norm": 0.6494219303020818, "learning_rate": 6.948743091645949e-06, "loss": 0.5748, "step": 6144 }, { "epoch": 0.78, "grad_norm": 0.8789723812372376, "learning_rate": 6.9477930404750304e-06, "loss": 0.6145, "step": 6145 }, { "epoch": 0.78, "grad_norm": 0.6700853983739093, "learning_rate": 6.9468429063927875e-06, "loss": 0.5913, "step": 6146 }, { "epoch": 0.78, "grad_norm": 0.7035478550174823, "learning_rate": 6.945892689439664e-06, "loss": 0.5895, "step": 6147 }, { "epoch": 0.78, "grad_norm": 0.6381283890988907, "learning_rate": 6.9449423896561055e-06, "loss": 0.5454, "step": 6148 }, { "epoch": 0.78, "grad_norm": 0.6237204562565154, "learning_rate": 6.943992007082565e-06, "loss": 0.5893, "step": 6149 }, { "epoch": 0.78, "grad_norm": 0.8342605489853235, "learning_rate": 6.943041541759496e-06, "loss": 0.6199, "step": 6150 }, { "epoch": 0.78, "grad_norm": 0.6102616469181731, "learning_rate": 6.94209099372736e-06, "loss": 0.5517, "step": 6151 }, { "epoch": 0.78, "grad_norm": 0.9253336997123479, "learning_rate": 6.941140363026615e-06, "loss": 0.5974, "step": 6152 }, { "epoch": 0.78, "grad_norm": 0.525117978333921, "learning_rate": 6.940189649697728e-06, "loss": 0.5245, "step": 6153 }, { "epoch": 0.78, "grad_norm": 0.6832617034871739, "learning_rate": 6.9392388537811675e-06, "loss": 0.5946, "step": 6154 }, { "epoch": 0.78, "grad_norm": 0.5714910527073446, "learning_rate": 6.938287975317406e-06, "loss": 0.5195, "step": 6155 }, { "epoch": 0.78, "grad_norm": 0.6361896105627376, "learning_rate": 6.937337014346918e-06, "loss": 0.6188, "step": 6156 }, { "epoch": 0.78, "grad_norm": 0.7734582689465479, "learning_rate": 6.936385970910185e-06, "loss": 0.5521, "step": 6157 }, { "epoch": 0.78, "grad_norm": 0.7891876975678117, "learning_rate": 6.9354348450476894e-06, "loss": 0.6444, "step": 6158 }, { "epoch": 0.78, "grad_norm": 0.7203664824765719, "learning_rate": 6.934483636799918e-06, "loss": 0.6059, "step": 6159 }, { "epoch": 0.78, "grad_norm": 0.7088666589050271, "learning_rate": 6.933532346207359e-06, "loss": 0.565, "step": 6160 }, { "epoch": 0.78, "grad_norm": 1.0739967395205492, "learning_rate": 6.932580973310507e-06, "loss": 0.6433, "step": 6161 }, { "epoch": 0.79, "grad_norm": 0.6721416735590974, "learning_rate": 6.931629518149858e-06, "loss": 0.5417, "step": 6162 }, { "epoch": 0.79, "grad_norm": 0.8208870301459986, "learning_rate": 6.930677980765913e-06, "loss": 0.6081, "step": 6163 }, { "epoch": 0.79, "grad_norm": 0.7918140216290176, "learning_rate": 6.929726361199176e-06, "loss": 0.6054, "step": 6164 }, { "epoch": 0.79, "grad_norm": 0.6279839957932126, "learning_rate": 6.928774659490155e-06, "loss": 0.4815, "step": 6165 }, { "epoch": 0.79, "grad_norm": 0.5384804074980266, "learning_rate": 6.9278228756793594e-06, "loss": 0.4604, "step": 6166 }, { "epoch": 0.79, "grad_norm": 0.5964097962248348, "learning_rate": 6.9268710098073044e-06, "loss": 0.5459, "step": 6167 }, { "epoch": 0.79, "grad_norm": 0.5860460691839549, "learning_rate": 6.925919061914509e-06, "loss": 0.4896, "step": 6168 }, { "epoch": 0.79, "grad_norm": 0.6369287628579393, "learning_rate": 6.924967032041493e-06, "loss": 0.5444, "step": 6169 }, { "epoch": 0.79, "grad_norm": 0.5809937042979003, "learning_rate": 6.924014920228781e-06, "loss": 0.5216, "step": 6170 }, { "epoch": 0.79, "grad_norm": 0.799396344707945, "learning_rate": 6.923062726516902e-06, "loss": 0.6009, "step": 6171 }, { "epoch": 0.79, "grad_norm": 0.6579431336556247, "learning_rate": 6.922110450946389e-06, "loss": 0.5732, "step": 6172 }, { "epoch": 0.79, "grad_norm": 0.9682610077255045, "learning_rate": 6.921158093557776e-06, "loss": 0.6369, "step": 6173 }, { "epoch": 0.79, "grad_norm": 0.7393093470537417, "learning_rate": 6.920205654391602e-06, "loss": 0.6148, "step": 6174 }, { "epoch": 0.79, "grad_norm": 0.7832586780668236, "learning_rate": 6.9192531334884095e-06, "loss": 0.6525, "step": 6175 }, { "epoch": 0.79, "grad_norm": 0.7039747573022053, "learning_rate": 6.918300530888744e-06, "loss": 0.5907, "step": 6176 }, { "epoch": 0.79, "grad_norm": 0.5574547025823923, "learning_rate": 6.917347846633155e-06, "loss": 0.5281, "step": 6177 }, { "epoch": 0.79, "grad_norm": 0.684420410474668, "learning_rate": 6.916395080762196e-06, "loss": 0.6124, "step": 6178 }, { "epoch": 0.79, "grad_norm": 0.6630924917107729, "learning_rate": 6.91544223331642e-06, "loss": 0.5838, "step": 6179 }, { "epoch": 0.79, "grad_norm": 1.0094688674188406, "learning_rate": 6.914489304336391e-06, "loss": 0.661, "step": 6180 }, { "epoch": 0.79, "grad_norm": 0.693860965646948, "learning_rate": 6.91353629386267e-06, "loss": 0.5885, "step": 6181 }, { "epoch": 0.79, "grad_norm": 0.6045233868854597, "learning_rate": 6.912583201935823e-06, "loss": 0.4921, "step": 6182 }, { "epoch": 0.79, "grad_norm": 0.5976208342295841, "learning_rate": 6.911630028596421e-06, "loss": 0.5161, "step": 6183 }, { "epoch": 0.79, "grad_norm": 0.6133173503245882, "learning_rate": 6.9106767738850386e-06, "loss": 0.4978, "step": 6184 }, { "epoch": 0.79, "grad_norm": 0.710504803794944, "learning_rate": 6.909723437842249e-06, "loss": 0.628, "step": 6185 }, { "epoch": 0.79, "grad_norm": 0.7743011024412121, "learning_rate": 6.908770020508637e-06, "loss": 0.6303, "step": 6186 }, { "epoch": 0.79, "grad_norm": 0.6856054433042441, "learning_rate": 6.907816521924785e-06, "loss": 0.557, "step": 6187 }, { "epoch": 0.79, "grad_norm": 0.781354247419235, "learning_rate": 6.90686294213128e-06, "loss": 0.5982, "step": 6188 }, { "epoch": 0.79, "grad_norm": 0.754877037475734, "learning_rate": 6.905909281168713e-06, "loss": 0.6367, "step": 6189 }, { "epoch": 0.79, "grad_norm": 0.7514741435184724, "learning_rate": 6.904955539077678e-06, "loss": 0.5725, "step": 6190 }, { "epoch": 0.79, "grad_norm": 0.8216087262393915, "learning_rate": 6.904001715898773e-06, "loss": 0.6266, "step": 6191 }, { "epoch": 0.79, "grad_norm": 0.7563601759227156, "learning_rate": 6.903047811672599e-06, "loss": 0.5646, "step": 6192 }, { "epoch": 0.79, "grad_norm": 0.6395881979251259, "learning_rate": 6.9020938264397615e-06, "loss": 0.4992, "step": 6193 }, { "epoch": 0.79, "grad_norm": 0.5733684592744743, "learning_rate": 6.901139760240868e-06, "loss": 0.4656, "step": 6194 }, { "epoch": 0.79, "grad_norm": 0.5579104745176718, "learning_rate": 6.90018561311653e-06, "loss": 0.529, "step": 6195 }, { "epoch": 0.79, "grad_norm": 0.8436171976687403, "learning_rate": 6.899231385107364e-06, "loss": 0.619, "step": 6196 }, { "epoch": 0.79, "grad_norm": 0.5820596508606891, "learning_rate": 6.8982770762539845e-06, "loss": 0.5333, "step": 6197 }, { "epoch": 0.79, "grad_norm": 0.8198069024579641, "learning_rate": 6.897322686597016e-06, "loss": 0.5821, "step": 6198 }, { "epoch": 0.79, "grad_norm": 0.5074964758574475, "learning_rate": 6.8963682161770854e-06, "loss": 0.4855, "step": 6199 }, { "epoch": 0.79, "grad_norm": 0.5053268102071425, "learning_rate": 6.895413665034819e-06, "loss": 0.461, "step": 6200 }, { "epoch": 0.79, "grad_norm": 0.7541032712826765, "learning_rate": 6.89445903321085e-06, "loss": 0.577, "step": 6201 }, { "epoch": 0.79, "grad_norm": 0.7463920325018282, "learning_rate": 6.893504320745814e-06, "loss": 0.6272, "step": 6202 }, { "epoch": 0.79, "grad_norm": 0.6227189089476501, "learning_rate": 6.892549527680348e-06, "loss": 0.5534, "step": 6203 }, { "epoch": 0.79, "grad_norm": 0.6424807333780551, "learning_rate": 6.891594654055098e-06, "loss": 0.5329, "step": 6204 }, { "epoch": 0.79, "grad_norm": 0.7734804845782416, "learning_rate": 6.8906396999107085e-06, "loss": 0.5427, "step": 6205 }, { "epoch": 0.79, "grad_norm": 0.742196668913134, "learning_rate": 6.889684665287828e-06, "loss": 0.6229, "step": 6206 }, { "epoch": 0.79, "grad_norm": 0.8613033288837254, "learning_rate": 6.88872955022711e-06, "loss": 0.679, "step": 6207 }, { "epoch": 0.79, "grad_norm": 0.6666564855842763, "learning_rate": 6.88777435476921e-06, "loss": 0.5233, "step": 6208 }, { "epoch": 0.79, "grad_norm": 0.8033239955118487, "learning_rate": 6.88681907895479e-06, "loss": 0.6039, "step": 6209 }, { "epoch": 0.79, "grad_norm": 0.7021445627143932, "learning_rate": 6.8858637228245086e-06, "loss": 0.6238, "step": 6210 }, { "epoch": 0.79, "grad_norm": 0.6665255342789965, "learning_rate": 6.884908286419036e-06, "loss": 0.5371, "step": 6211 }, { "epoch": 0.79, "grad_norm": 0.5943111470724661, "learning_rate": 6.8839527697790405e-06, "loss": 0.5069, "step": 6212 }, { "epoch": 0.79, "grad_norm": 0.5626700224449825, "learning_rate": 6.882997172945198e-06, "loss": 0.509, "step": 6213 }, { "epoch": 0.79, "grad_norm": 0.7003198306754069, "learning_rate": 6.882041495958181e-06, "loss": 0.4998, "step": 6214 }, { "epoch": 0.79, "grad_norm": 0.5864565904943859, "learning_rate": 6.881085738858673e-06, "loss": 0.513, "step": 6215 }, { "epoch": 0.79, "grad_norm": 0.5634487255844925, "learning_rate": 6.880129901687356e-06, "loss": 0.4916, "step": 6216 }, { "epoch": 0.79, "grad_norm": 0.5806470534231932, "learning_rate": 6.879173984484915e-06, "loss": 0.5266, "step": 6217 }, { "epoch": 0.79, "grad_norm": 0.7388452903608196, "learning_rate": 6.878217987292044e-06, "loss": 0.626, "step": 6218 }, { "epoch": 0.79, "grad_norm": 0.7120962507420958, "learning_rate": 6.8772619101494356e-06, "loss": 0.545, "step": 6219 }, { "epoch": 0.79, "grad_norm": 0.5667017214524706, "learning_rate": 6.876305753097786e-06, "loss": 0.4413, "step": 6220 }, { "epoch": 0.79, "grad_norm": 0.9126339519742783, "learning_rate": 6.875349516177796e-06, "loss": 0.607, "step": 6221 }, { "epoch": 0.79, "grad_norm": 0.6508330591251228, "learning_rate": 6.87439319943017e-06, "loss": 0.5155, "step": 6222 }, { "epoch": 0.79, "grad_norm": 0.559887847798657, "learning_rate": 6.873436802895617e-06, "loss": 0.5099, "step": 6223 }, { "epoch": 0.79, "grad_norm": 0.7936099331457016, "learning_rate": 6.872480326614844e-06, "loss": 0.5903, "step": 6224 }, { "epoch": 0.79, "grad_norm": 0.610167109177135, "learning_rate": 6.871523770628568e-06, "loss": 0.5426, "step": 6225 }, { "epoch": 0.79, "grad_norm": 0.8166665705034846, "learning_rate": 6.870567134977505e-06, "loss": 0.6252, "step": 6226 }, { "epoch": 0.79, "grad_norm": 0.7287601280290925, "learning_rate": 6.8696104197023776e-06, "loss": 0.6008, "step": 6227 }, { "epoch": 0.79, "grad_norm": 0.601670641793849, "learning_rate": 6.868653624843906e-06, "loss": 0.5068, "step": 6228 }, { "epoch": 0.79, "grad_norm": 0.7617357785708877, "learning_rate": 6.8676967504428235e-06, "loss": 0.61, "step": 6229 }, { "epoch": 0.79, "grad_norm": 0.7307420799092397, "learning_rate": 6.866739796539859e-06, "loss": 0.6041, "step": 6230 }, { "epoch": 0.79, "grad_norm": 0.5876454093073531, "learning_rate": 6.865782763175746e-06, "loss": 0.4629, "step": 6231 }, { "epoch": 0.79, "grad_norm": 0.7987521085472857, "learning_rate": 6.864825650391223e-06, "loss": 0.6566, "step": 6232 }, { "epoch": 0.79, "grad_norm": 0.7055230017174057, "learning_rate": 6.863868458227031e-06, "loss": 0.5847, "step": 6233 }, { "epoch": 0.79, "grad_norm": 0.7088863941530662, "learning_rate": 6.862911186723914e-06, "loss": 0.5952, "step": 6234 }, { "epoch": 0.79, "grad_norm": 0.7315691821038331, "learning_rate": 6.861953835922621e-06, "loss": 0.6058, "step": 6235 }, { "epoch": 0.79, "grad_norm": 0.7202830970738314, "learning_rate": 6.860996405863905e-06, "loss": 0.5745, "step": 6236 }, { "epoch": 0.79, "grad_norm": 0.7226304380598846, "learning_rate": 6.860038896588517e-06, "loss": 0.5845, "step": 6237 }, { "epoch": 0.79, "grad_norm": 0.8105305245141821, "learning_rate": 6.859081308137217e-06, "loss": 0.6127, "step": 6238 }, { "epoch": 0.79, "grad_norm": 0.8045750980169796, "learning_rate": 6.858123640550768e-06, "loss": 0.5539, "step": 6239 }, { "epoch": 0.79, "grad_norm": 0.6142025774557623, "learning_rate": 6.8571658938699325e-06, "loss": 0.5098, "step": 6240 }, { "epoch": 0.8, "grad_norm": 0.7187259568110902, "learning_rate": 6.856208068135479e-06, "loss": 0.6084, "step": 6241 }, { "epoch": 0.8, "grad_norm": 0.7185962916088359, "learning_rate": 6.855250163388181e-06, "loss": 0.5968, "step": 6242 }, { "epoch": 0.8, "grad_norm": 0.8012193986950965, "learning_rate": 6.854292179668811e-06, "loss": 0.5973, "step": 6243 }, { "epoch": 0.8, "grad_norm": 0.6125550453861028, "learning_rate": 6.853334117018149e-06, "loss": 0.4825, "step": 6244 }, { "epoch": 0.8, "grad_norm": 0.749544309407826, "learning_rate": 6.852375975476975e-06, "loss": 0.5419, "step": 6245 }, { "epoch": 0.8, "grad_norm": 0.5020314314941644, "learning_rate": 6.851417755086076e-06, "loss": 0.4914, "step": 6246 }, { "epoch": 0.8, "grad_norm": 0.7697754095881776, "learning_rate": 6.850459455886238e-06, "loss": 0.6191, "step": 6247 }, { "epoch": 0.8, "grad_norm": 0.5241607131772357, "learning_rate": 6.8495010779182555e-06, "loss": 0.5187, "step": 6248 }, { "epoch": 0.8, "grad_norm": 0.5733990169862537, "learning_rate": 6.848542621222922e-06, "loss": 0.5339, "step": 6249 }, { "epoch": 0.8, "grad_norm": 0.9785630042083076, "learning_rate": 6.847584085841037e-06, "loss": 0.6041, "step": 6250 }, { "epoch": 0.8, "grad_norm": 0.6649692871869353, "learning_rate": 6.846625471813402e-06, "loss": 0.5882, "step": 6251 }, { "epoch": 0.8, "grad_norm": 0.8112827043327446, "learning_rate": 6.84566677918082e-06, "loss": 0.6561, "step": 6252 }, { "epoch": 0.8, "grad_norm": 0.7365231857471037, "learning_rate": 6.8447080079841e-06, "loss": 0.5633, "step": 6253 }, { "epoch": 0.8, "grad_norm": 0.5696004897404208, "learning_rate": 6.843749158264057e-06, "loss": 0.5103, "step": 6254 }, { "epoch": 0.8, "grad_norm": 0.6108554199444037, "learning_rate": 6.842790230061504e-06, "loss": 0.5826, "step": 6255 }, { "epoch": 0.8, "grad_norm": 0.6850630542323604, "learning_rate": 6.841831223417259e-06, "loss": 0.5647, "step": 6256 }, { "epoch": 0.8, "grad_norm": 0.7699240680149971, "learning_rate": 6.840872138372145e-06, "loss": 0.6505, "step": 6257 }, { "epoch": 0.8, "grad_norm": 0.5776650376463175, "learning_rate": 6.8399129749669866e-06, "loss": 0.533, "step": 6258 }, { "epoch": 0.8, "grad_norm": 0.5806014451399877, "learning_rate": 6.838953733242611e-06, "loss": 0.5068, "step": 6259 }, { "epoch": 0.8, "grad_norm": 0.6530780661175465, "learning_rate": 6.837994413239854e-06, "loss": 0.5724, "step": 6260 }, { "epoch": 0.8, "grad_norm": 0.8350028438034934, "learning_rate": 6.837035014999546e-06, "loss": 0.5839, "step": 6261 }, { "epoch": 0.8, "grad_norm": 0.7615034528067307, "learning_rate": 6.836075538562529e-06, "loss": 0.602, "step": 6262 }, { "epoch": 0.8, "grad_norm": 0.5822667151176976, "learning_rate": 6.8351159839696436e-06, "loss": 0.5173, "step": 6263 }, { "epoch": 0.8, "grad_norm": 0.755752935684026, "learning_rate": 6.8341563512617335e-06, "loss": 0.5996, "step": 6264 }, { "epoch": 0.8, "grad_norm": 0.7101963630379892, "learning_rate": 6.833196640479649e-06, "loss": 0.5841, "step": 6265 }, { "epoch": 0.8, "grad_norm": 0.6852151021143676, "learning_rate": 6.832236851664242e-06, "loss": 0.5554, "step": 6266 }, { "epoch": 0.8, "grad_norm": 0.6015082261462827, "learning_rate": 6.831276984856369e-06, "loss": 0.497, "step": 6267 }, { "epoch": 0.8, "grad_norm": 0.6176942236606779, "learning_rate": 6.830317040096886e-06, "loss": 0.4986, "step": 6268 }, { "epoch": 0.8, "grad_norm": 0.7603440133190384, "learning_rate": 6.8293570174266535e-06, "loss": 0.6027, "step": 6269 }, { "epoch": 0.8, "grad_norm": 0.637943257089538, "learning_rate": 6.828396916886539e-06, "loss": 0.5489, "step": 6270 }, { "epoch": 0.8, "grad_norm": 0.7500199822387734, "learning_rate": 6.827436738517412e-06, "loss": 0.5838, "step": 6271 }, { "epoch": 0.8, "grad_norm": 0.6582165205043784, "learning_rate": 6.8264764823601404e-06, "loss": 0.5221, "step": 6272 }, { "epoch": 0.8, "grad_norm": 0.7077572872091533, "learning_rate": 6.8255161484556045e-06, "loss": 0.586, "step": 6273 }, { "epoch": 0.8, "grad_norm": 0.7533088625713023, "learning_rate": 6.8245557368446776e-06, "loss": 0.6029, "step": 6274 }, { "epoch": 0.8, "grad_norm": 0.7037599100165665, "learning_rate": 6.823595247568244e-06, "loss": 0.5991, "step": 6275 }, { "epoch": 0.8, "grad_norm": 0.6571375999275945, "learning_rate": 6.8226346806671885e-06, "loss": 0.529, "step": 6276 }, { "epoch": 0.8, "grad_norm": 0.6469866110133251, "learning_rate": 6.821674036182399e-06, "loss": 0.5267, "step": 6277 }, { "epoch": 0.8, "grad_norm": 0.5644371973477075, "learning_rate": 6.820713314154765e-06, "loss": 0.5203, "step": 6278 }, { "epoch": 0.8, "grad_norm": 0.5959305779758147, "learning_rate": 6.819752514625184e-06, "loss": 0.5317, "step": 6279 }, { "epoch": 0.8, "grad_norm": 0.7102077211387456, "learning_rate": 6.8187916376345555e-06, "loss": 0.6044, "step": 6280 }, { "epoch": 0.8, "grad_norm": 0.5544538366449274, "learning_rate": 6.817830683223778e-06, "loss": 0.5082, "step": 6281 }, { "epoch": 0.8, "grad_norm": 0.8184035636411395, "learning_rate": 6.816869651433757e-06, "loss": 0.6817, "step": 6282 }, { "epoch": 0.8, "grad_norm": 0.7213404586909871, "learning_rate": 6.815908542305402e-06, "loss": 0.5216, "step": 6283 }, { "epoch": 0.8, "grad_norm": 0.7223103989692812, "learning_rate": 6.814947355879622e-06, "loss": 0.5678, "step": 6284 }, { "epoch": 0.8, "grad_norm": 0.7481573018533019, "learning_rate": 6.8139860921973335e-06, "loss": 0.5798, "step": 6285 }, { "epoch": 0.8, "grad_norm": 0.6012225613001245, "learning_rate": 6.813024751299453e-06, "loss": 0.5014, "step": 6286 }, { "epoch": 0.8, "grad_norm": 0.6577919408740096, "learning_rate": 6.812063333226903e-06, "loss": 0.5676, "step": 6287 }, { "epoch": 0.8, "grad_norm": 0.5780130804933448, "learning_rate": 6.8111018380206075e-06, "loss": 0.5257, "step": 6288 }, { "epoch": 0.8, "grad_norm": 0.6037215296113814, "learning_rate": 6.8101402657214934e-06, "loss": 0.5057, "step": 6289 }, { "epoch": 0.8, "grad_norm": 0.6517998028216473, "learning_rate": 6.809178616370492e-06, "loss": 0.5293, "step": 6290 }, { "epoch": 0.8, "grad_norm": 0.6471955927017133, "learning_rate": 6.8082168900085385e-06, "loss": 0.525, "step": 6291 }, { "epoch": 0.8, "grad_norm": 0.8668090658652021, "learning_rate": 6.807255086676572e-06, "loss": 0.6323, "step": 6292 }, { "epoch": 0.8, "grad_norm": 0.7283029204199846, "learning_rate": 6.80629320641553e-06, "loss": 0.6375, "step": 6293 }, { "epoch": 0.8, "grad_norm": 0.777387050154529, "learning_rate": 6.805331249266359e-06, "loss": 0.6458, "step": 6294 }, { "epoch": 0.8, "grad_norm": 0.6352214781589633, "learning_rate": 6.8043692152700056e-06, "loss": 0.5185, "step": 6295 }, { "epoch": 0.8, "grad_norm": 0.684343183397395, "learning_rate": 6.803407104467421e-06, "loss": 0.6893, "step": 6296 }, { "epoch": 0.8, "grad_norm": 0.7276600518156257, "learning_rate": 6.802444916899558e-06, "loss": 0.5878, "step": 6297 }, { "epoch": 0.8, "grad_norm": 0.6796942134435697, "learning_rate": 6.801482652607375e-06, "loss": 0.5382, "step": 6298 }, { "epoch": 0.8, "grad_norm": 0.6201585270623763, "learning_rate": 6.800520311631833e-06, "loss": 0.5295, "step": 6299 }, { "epoch": 0.8, "grad_norm": 0.7462001506166249, "learning_rate": 6.799557894013894e-06, "loss": 0.6015, "step": 6300 }, { "epoch": 0.8, "grad_norm": 0.6913324373138605, "learning_rate": 6.798595399794527e-06, "loss": 0.6534, "step": 6301 }, { "epoch": 0.8, "grad_norm": 0.8357334549675459, "learning_rate": 6.7976328290147e-06, "loss": 0.6185, "step": 6302 }, { "epoch": 0.8, "grad_norm": 0.546093348874749, "learning_rate": 6.796670181715388e-06, "loss": 0.474, "step": 6303 }, { "epoch": 0.8, "grad_norm": 0.7175699288559444, "learning_rate": 6.795707457937568e-06, "loss": 0.585, "step": 6304 }, { "epoch": 0.8, "grad_norm": 0.6450632005756834, "learning_rate": 6.794744657722221e-06, "loss": 0.5129, "step": 6305 }, { "epoch": 0.8, "grad_norm": 0.7124027743503827, "learning_rate": 6.793781781110328e-06, "loss": 0.6108, "step": 6306 }, { "epoch": 0.8, "grad_norm": 0.8872452299574621, "learning_rate": 6.792818828142876e-06, "loss": 0.6345, "step": 6307 }, { "epoch": 0.8, "grad_norm": 0.7730338132466026, "learning_rate": 6.791855798860857e-06, "loss": 0.5703, "step": 6308 }, { "epoch": 0.8, "grad_norm": 0.6589697691999539, "learning_rate": 6.790892693305261e-06, "loss": 0.5561, "step": 6309 }, { "epoch": 0.8, "grad_norm": 0.6191895272824437, "learning_rate": 6.789929511517087e-06, "loss": 0.5128, "step": 6310 }, { "epoch": 0.8, "grad_norm": 0.7340550573357525, "learning_rate": 6.788966253537333e-06, "loss": 0.6193, "step": 6311 }, { "epoch": 0.8, "grad_norm": 0.6602665420114553, "learning_rate": 6.788002919407003e-06, "loss": 0.4922, "step": 6312 }, { "epoch": 0.8, "grad_norm": 0.6551173853319442, "learning_rate": 6.7870395091671014e-06, "loss": 0.5302, "step": 6313 }, { "epoch": 0.8, "grad_norm": 0.8288366529652099, "learning_rate": 6.78607602285864e-06, "loss": 0.6447, "step": 6314 }, { "epoch": 0.8, "grad_norm": 0.8531539098497349, "learning_rate": 6.785112460522627e-06, "loss": 0.6653, "step": 6315 }, { "epoch": 0.8, "grad_norm": 0.7759389167907225, "learning_rate": 6.784148822200084e-06, "loss": 0.6719, "step": 6316 }, { "epoch": 0.8, "grad_norm": 0.6430947513586196, "learning_rate": 6.783185107932025e-06, "loss": 0.4969, "step": 6317 }, { "epoch": 0.8, "grad_norm": 0.6864824543720452, "learning_rate": 6.782221317759475e-06, "loss": 0.5667, "step": 6318 }, { "epoch": 0.81, "grad_norm": 0.5712421829392823, "learning_rate": 6.78125745172346e-06, "loss": 0.5097, "step": 6319 }, { "epoch": 0.81, "grad_norm": 0.9133388746218416, "learning_rate": 6.7802935098650055e-06, "loss": 0.6281, "step": 6320 }, { "epoch": 0.81, "grad_norm": 0.8147174738533315, "learning_rate": 6.779329492225144e-06, "loss": 0.6049, "step": 6321 }, { "epoch": 0.81, "grad_norm": 0.6203870899888008, "learning_rate": 6.778365398844915e-06, "loss": 0.5212, "step": 6322 }, { "epoch": 0.81, "grad_norm": 0.6220443670644604, "learning_rate": 6.7774012297653525e-06, "loss": 0.5344, "step": 6323 }, { "epoch": 0.81, "grad_norm": 0.7281004215424857, "learning_rate": 6.776436985027499e-06, "loss": 0.5908, "step": 6324 }, { "epoch": 0.81, "grad_norm": 0.766516496697392, "learning_rate": 6.7754726646724e-06, "loss": 0.5989, "step": 6325 }, { "epoch": 0.81, "grad_norm": 0.8734819449120286, "learning_rate": 6.774508268741105e-06, "loss": 0.5812, "step": 6326 }, { "epoch": 0.81, "grad_norm": 0.6165868605416652, "learning_rate": 6.77354379727466e-06, "loss": 0.556, "step": 6327 }, { "epoch": 0.81, "grad_norm": 0.7544123996488468, "learning_rate": 6.772579250314126e-06, "loss": 0.5535, "step": 6328 }, { "epoch": 0.81, "grad_norm": 0.6843396353768542, "learning_rate": 6.771614627900556e-06, "loss": 0.5076, "step": 6329 }, { "epoch": 0.81, "grad_norm": 0.7335055256580995, "learning_rate": 6.770649930075013e-06, "loss": 0.626, "step": 6330 }, { "epoch": 0.81, "grad_norm": 0.8637164624456584, "learning_rate": 6.76968515687856e-06, "loss": 0.6601, "step": 6331 }, { "epoch": 0.81, "grad_norm": 0.6383292611934841, "learning_rate": 6.768720308352267e-06, "loss": 0.5465, "step": 6332 }, { "epoch": 0.81, "grad_norm": 1.0248782307901643, "learning_rate": 6.767755384537202e-06, "loss": 0.621, "step": 6333 }, { "epoch": 0.81, "grad_norm": 0.779170201696406, "learning_rate": 6.766790385474436e-06, "loss": 0.6268, "step": 6334 }, { "epoch": 0.81, "grad_norm": 0.6401784606901068, "learning_rate": 6.765825311205052e-06, "loss": 0.5438, "step": 6335 }, { "epoch": 0.81, "grad_norm": 0.7786211418180801, "learning_rate": 6.764860161770128e-06, "loss": 0.6498, "step": 6336 }, { "epoch": 0.81, "grad_norm": 0.6726624664743807, "learning_rate": 6.7638949372107455e-06, "loss": 0.5716, "step": 6337 }, { "epoch": 0.81, "grad_norm": 0.6774421434867293, "learning_rate": 6.762929637567992e-06, "loss": 0.5998, "step": 6338 }, { "epoch": 0.81, "grad_norm": 0.5738097360089144, "learning_rate": 6.761964262882957e-06, "loss": 0.5201, "step": 6339 }, { "epoch": 0.81, "grad_norm": 0.578057836464607, "learning_rate": 6.760998813196735e-06, "loss": 0.5358, "step": 6340 }, { "epoch": 0.81, "grad_norm": 0.9411607403286338, "learning_rate": 6.760033288550419e-06, "loss": 0.6528, "step": 6341 }, { "epoch": 0.81, "grad_norm": 0.7901292814985689, "learning_rate": 6.759067688985111e-06, "loss": 0.6205, "step": 6342 }, { "epoch": 0.81, "grad_norm": 0.7859494662537901, "learning_rate": 6.758102014541914e-06, "loss": 0.6246, "step": 6343 }, { "epoch": 0.81, "grad_norm": 0.5360097350128014, "learning_rate": 6.757136265261931e-06, "loss": 0.4813, "step": 6344 }, { "epoch": 0.81, "grad_norm": 0.6384464118020083, "learning_rate": 6.756170441186273e-06, "loss": 0.575, "step": 6345 }, { "epoch": 0.81, "grad_norm": 0.7887056311658751, "learning_rate": 6.755204542356051e-06, "loss": 0.6061, "step": 6346 }, { "epoch": 0.81, "grad_norm": 0.7973621020106734, "learning_rate": 6.754238568812379e-06, "loss": 0.6175, "step": 6347 }, { "epoch": 0.81, "grad_norm": 0.5730633541510033, "learning_rate": 6.753272520596379e-06, "loss": 0.5138, "step": 6348 }, { "epoch": 0.81, "grad_norm": 0.647216245800709, "learning_rate": 6.752306397749168e-06, "loss": 0.5691, "step": 6349 }, { "epoch": 0.81, "grad_norm": 0.5954325889514445, "learning_rate": 6.751340200311875e-06, "loss": 0.5111, "step": 6350 }, { "epoch": 0.81, "grad_norm": 0.6657911872799888, "learning_rate": 6.7503739283256275e-06, "loss": 0.5294, "step": 6351 }, { "epoch": 0.81, "grad_norm": 0.5328784579864676, "learning_rate": 6.749407581831553e-06, "loss": 0.5258, "step": 6352 }, { "epoch": 0.81, "grad_norm": 0.8368523370346123, "learning_rate": 6.748441160870788e-06, "loss": 0.6113, "step": 6353 }, { "epoch": 0.81, "grad_norm": 0.5718359238750406, "learning_rate": 6.74747466548447e-06, "loss": 0.5504, "step": 6354 }, { "epoch": 0.81, "grad_norm": 0.7893892783212353, "learning_rate": 6.746508095713742e-06, "loss": 0.6742, "step": 6355 }, { "epoch": 0.81, "grad_norm": 0.6334354873352155, "learning_rate": 6.745541451599743e-06, "loss": 0.576, "step": 6356 }, { "epoch": 0.81, "grad_norm": 0.6671874039448502, "learning_rate": 6.7445747331836235e-06, "loss": 0.5022, "step": 6357 }, { "epoch": 0.81, "grad_norm": 0.5682849379380871, "learning_rate": 6.743607940506531e-06, "loss": 0.5301, "step": 6358 }, { "epoch": 0.81, "grad_norm": 0.6529245815969871, "learning_rate": 6.742641073609621e-06, "loss": 0.5456, "step": 6359 }, { "epoch": 0.81, "grad_norm": 0.5529149987318653, "learning_rate": 6.74167413253405e-06, "loss": 0.5154, "step": 6360 }, { "epoch": 0.81, "grad_norm": 0.6271718459165078, "learning_rate": 6.740707117320976e-06, "loss": 0.511, "step": 6361 }, { "epoch": 0.81, "grad_norm": 0.8792267567828937, "learning_rate": 6.739740028011564e-06, "loss": 0.6005, "step": 6362 }, { "epoch": 0.81, "grad_norm": 0.6326283319976258, "learning_rate": 6.738772864646976e-06, "loss": 0.5839, "step": 6363 }, { "epoch": 0.81, "grad_norm": 0.7789312932868144, "learning_rate": 6.737805627268385e-06, "loss": 0.5517, "step": 6364 }, { "epoch": 0.81, "grad_norm": 0.8044445950198794, "learning_rate": 6.73683831591696e-06, "loss": 0.6369, "step": 6365 }, { "epoch": 0.81, "grad_norm": 0.5872765022955363, "learning_rate": 6.73587093063388e-06, "loss": 0.5741, "step": 6366 }, { "epoch": 0.81, "grad_norm": 0.6077595396691541, "learning_rate": 6.734903471460321e-06, "loss": 0.5523, "step": 6367 }, { "epoch": 0.81, "grad_norm": 0.6037433871943244, "learning_rate": 6.733935938437466e-06, "loss": 0.5268, "step": 6368 }, { "epoch": 0.81, "grad_norm": 0.8342328555210821, "learning_rate": 6.732968331606498e-06, "loss": 0.6046, "step": 6369 }, { "epoch": 0.81, "grad_norm": 0.623905236072365, "learning_rate": 6.732000651008606e-06, "loss": 0.5542, "step": 6370 }, { "epoch": 0.81, "grad_norm": 0.5152553373721863, "learning_rate": 6.731032896684979e-06, "loss": 0.4848, "step": 6371 }, { "epoch": 0.81, "grad_norm": 0.6491968961014971, "learning_rate": 6.730065068676816e-06, "loss": 0.5371, "step": 6372 }, { "epoch": 0.81, "grad_norm": 0.6969049736847973, "learning_rate": 6.72909716702531e-06, "loss": 0.5595, "step": 6373 }, { "epoch": 0.81, "grad_norm": 0.5510741188143825, "learning_rate": 6.728129191771664e-06, "loss": 0.4857, "step": 6374 }, { "epoch": 0.81, "grad_norm": 0.7451068437304299, "learning_rate": 6.727161142957081e-06, "loss": 0.6152, "step": 6375 }, { "epoch": 0.81, "grad_norm": 0.6298200907591202, "learning_rate": 6.726193020622766e-06, "loss": 0.49, "step": 6376 }, { "epoch": 0.81, "grad_norm": 0.6542965102588401, "learning_rate": 6.72522482480993e-06, "loss": 0.5584, "step": 6377 }, { "epoch": 0.81, "grad_norm": 0.7509972788597397, "learning_rate": 6.724256555559787e-06, "loss": 0.6335, "step": 6378 }, { "epoch": 0.81, "grad_norm": 0.6254214657372537, "learning_rate": 6.723288212913553e-06, "loss": 0.5257, "step": 6379 }, { "epoch": 0.81, "grad_norm": 0.7173655135127083, "learning_rate": 6.722319796912446e-06, "loss": 0.5995, "step": 6380 }, { "epoch": 0.81, "grad_norm": 0.8969418779895143, "learning_rate": 6.721351307597689e-06, "loss": 0.6029, "step": 6381 }, { "epoch": 0.81, "grad_norm": 0.7638516192884462, "learning_rate": 6.720382745010507e-06, "loss": 0.5027, "step": 6382 }, { "epoch": 0.81, "grad_norm": 0.6582976704035635, "learning_rate": 6.71941410919213e-06, "loss": 0.4851, "step": 6383 }, { "epoch": 0.81, "grad_norm": 1.1897043957818842, "learning_rate": 6.718445400183789e-06, "loss": 0.6378, "step": 6384 }, { "epoch": 0.81, "grad_norm": 0.7208578914676788, "learning_rate": 6.717476618026717e-06, "loss": 0.5765, "step": 6385 }, { "epoch": 0.81, "grad_norm": 0.5721751519254793, "learning_rate": 6.716507762762155e-06, "loss": 0.4664, "step": 6386 }, { "epoch": 0.81, "grad_norm": 0.7802442509445575, "learning_rate": 6.7155388344313434e-06, "loss": 0.6487, "step": 6387 }, { "epoch": 0.81, "grad_norm": 0.882001307796168, "learning_rate": 6.714569833075524e-06, "loss": 0.6357, "step": 6388 }, { "epoch": 0.81, "grad_norm": 0.879967952624162, "learning_rate": 6.713600758735946e-06, "loss": 0.5818, "step": 6389 }, { "epoch": 0.81, "grad_norm": 0.5626014171327017, "learning_rate": 6.71263161145386e-06, "loss": 0.5003, "step": 6390 }, { "epoch": 0.81, "grad_norm": 0.7976670591117455, "learning_rate": 6.7116623912705196e-06, "loss": 0.6132, "step": 6391 }, { "epoch": 0.81, "grad_norm": 0.5724625636422039, "learning_rate": 6.710693098227181e-06, "loss": 0.4979, "step": 6392 }, { "epoch": 0.81, "grad_norm": 0.7429685766744689, "learning_rate": 6.709723732365103e-06, "loss": 0.5725, "step": 6393 }, { "epoch": 0.81, "grad_norm": 0.5828556645578636, "learning_rate": 6.70875429372555e-06, "loss": 0.5157, "step": 6394 }, { "epoch": 0.81, "grad_norm": 0.6320381571843299, "learning_rate": 6.707784782349787e-06, "loss": 0.5201, "step": 6395 }, { "epoch": 0.81, "grad_norm": 0.6159010265565266, "learning_rate": 6.706815198279082e-06, "loss": 0.5008, "step": 6396 }, { "epoch": 0.81, "grad_norm": 0.5737740421180026, "learning_rate": 6.7058455415547085e-06, "loss": 0.4834, "step": 6397 }, { "epoch": 0.82, "grad_norm": 0.5789861003246338, "learning_rate": 6.704875812217942e-06, "loss": 0.472, "step": 6398 }, { "epoch": 0.82, "grad_norm": 0.6760633839978232, "learning_rate": 6.7039060103100605e-06, "loss": 0.6028, "step": 6399 }, { "epoch": 0.82, "grad_norm": 0.7588558279356667, "learning_rate": 6.702936135872344e-06, "loss": 0.6653, "step": 6400 }, { "epoch": 0.82, "grad_norm": 0.5963435157206481, "learning_rate": 6.70196618894608e-06, "loss": 0.5475, "step": 6401 }, { "epoch": 0.82, "grad_norm": 0.736899258733208, "learning_rate": 6.700996169572553e-06, "loss": 0.5398, "step": 6402 }, { "epoch": 0.82, "grad_norm": 0.8047954247923363, "learning_rate": 6.700026077793052e-06, "loss": 0.6701, "step": 6403 }, { "epoch": 0.82, "grad_norm": 0.7811606989006268, "learning_rate": 6.699055913648877e-06, "loss": 0.5097, "step": 6404 }, { "epoch": 0.82, "grad_norm": 0.8208408967904837, "learning_rate": 6.69808567718132e-06, "loss": 0.6005, "step": 6405 }, { "epoch": 0.82, "grad_norm": 0.6951735888945008, "learning_rate": 6.6971153684316815e-06, "loss": 0.5977, "step": 6406 }, { "epoch": 0.82, "grad_norm": 0.7181001260605885, "learning_rate": 6.696144987441265e-06, "loss": 0.5459, "step": 6407 }, { "epoch": 0.82, "grad_norm": 1.2468873166070857, "learning_rate": 6.695174534251377e-06, "loss": 0.6572, "step": 6408 }, { "epoch": 0.82, "grad_norm": 0.7854443515098432, "learning_rate": 6.694204008903326e-06, "loss": 0.6118, "step": 6409 }, { "epoch": 0.82, "grad_norm": 0.5717858423249883, "learning_rate": 6.693233411438424e-06, "loss": 0.4987, "step": 6410 }, { "epoch": 0.82, "grad_norm": 0.7099312229948749, "learning_rate": 6.692262741897988e-06, "loss": 0.594, "step": 6411 }, { "epoch": 0.82, "grad_norm": 0.8691331635780105, "learning_rate": 6.691292000323333e-06, "loss": 0.6265, "step": 6412 }, { "epoch": 0.82, "grad_norm": 0.6657924821602142, "learning_rate": 6.690321186755783e-06, "loss": 0.4977, "step": 6413 }, { "epoch": 0.82, "grad_norm": 0.8928952064216477, "learning_rate": 6.68935030123666e-06, "loss": 0.5673, "step": 6414 }, { "epoch": 0.82, "grad_norm": 0.6431463191041895, "learning_rate": 6.688379343807294e-06, "loss": 0.5669, "step": 6415 }, { "epoch": 0.82, "grad_norm": 0.891696782672777, "learning_rate": 6.687408314509015e-06, "loss": 0.6501, "step": 6416 }, { "epoch": 0.82, "grad_norm": 0.7976500124486192, "learning_rate": 6.6864372133831565e-06, "loss": 0.5879, "step": 6417 }, { "epoch": 0.82, "grad_norm": 0.8207467405287292, "learning_rate": 6.685466040471055e-06, "loss": 0.6457, "step": 6418 }, { "epoch": 0.82, "grad_norm": 0.8132338002114149, "learning_rate": 6.6844947958140495e-06, "loss": 0.5782, "step": 6419 }, { "epoch": 0.82, "grad_norm": 0.5865131703071755, "learning_rate": 6.683523479453482e-06, "loss": 0.5329, "step": 6420 }, { "epoch": 0.82, "grad_norm": 0.7642471230476104, "learning_rate": 6.682552091430702e-06, "loss": 0.6505, "step": 6421 }, { "epoch": 0.82, "grad_norm": 0.6269724046919594, "learning_rate": 6.681580631787055e-06, "loss": 0.5537, "step": 6422 }, { "epoch": 0.82, "grad_norm": 0.787866405621647, "learning_rate": 6.680609100563895e-06, "loss": 0.6021, "step": 6423 }, { "epoch": 0.82, "grad_norm": 0.63042795399231, "learning_rate": 6.679637497802576e-06, "loss": 0.573, "step": 6424 }, { "epoch": 0.82, "grad_norm": 0.5839167676917506, "learning_rate": 6.6786658235444545e-06, "loss": 0.4792, "step": 6425 }, { "epoch": 0.82, "grad_norm": 0.8759054067242198, "learning_rate": 6.677694077830895e-06, "loss": 0.6558, "step": 6426 }, { "epoch": 0.82, "grad_norm": 0.6307238457096034, "learning_rate": 6.676722260703257e-06, "loss": 0.5357, "step": 6427 }, { "epoch": 0.82, "grad_norm": 0.9370918199493273, "learning_rate": 6.675750372202914e-06, "loss": 0.6828, "step": 6428 }, { "epoch": 0.82, "grad_norm": 0.8054200309083763, "learning_rate": 6.674778412371231e-06, "loss": 0.5958, "step": 6429 }, { "epoch": 0.82, "grad_norm": 0.7846811580356037, "learning_rate": 6.673806381249582e-06, "loss": 0.6207, "step": 6430 }, { "epoch": 0.82, "grad_norm": 2.5581675605256007, "learning_rate": 6.6728342788793455e-06, "loss": 0.5634, "step": 6431 }, { "epoch": 0.82, "grad_norm": 0.7101407156229939, "learning_rate": 6.671862105301898e-06, "loss": 0.5452, "step": 6432 }, { "epoch": 0.82, "grad_norm": 0.7614938372698999, "learning_rate": 6.670889860558623e-06, "loss": 0.4759, "step": 6433 }, { "epoch": 0.82, "grad_norm": 0.8168065779181795, "learning_rate": 6.669917544690908e-06, "loss": 0.5904, "step": 6434 }, { "epoch": 0.82, "grad_norm": 0.6750045326616718, "learning_rate": 6.668945157740139e-06, "loss": 0.4993, "step": 6435 }, { "epoch": 0.82, "grad_norm": 0.7930097513448877, "learning_rate": 6.667972699747707e-06, "loss": 0.6017, "step": 6436 }, { "epoch": 0.82, "grad_norm": 0.5277232096528562, "learning_rate": 6.667000170755007e-06, "loss": 0.4836, "step": 6437 }, { "epoch": 0.82, "grad_norm": 0.5934540690664346, "learning_rate": 6.666027570803437e-06, "loss": 0.544, "step": 6438 }, { "epoch": 0.82, "grad_norm": 0.5690663884221651, "learning_rate": 6.665054899934397e-06, "loss": 0.5077, "step": 6439 }, { "epoch": 0.82, "grad_norm": 0.6735525718196225, "learning_rate": 6.664082158189291e-06, "loss": 0.5782, "step": 6440 }, { "epoch": 0.82, "grad_norm": 0.7205738057595237, "learning_rate": 6.663109345609525e-06, "loss": 0.5601, "step": 6441 }, { "epoch": 0.82, "grad_norm": 0.7535096231438502, "learning_rate": 6.662136462236509e-06, "loss": 0.6243, "step": 6442 }, { "epoch": 0.82, "grad_norm": 0.6220989097344559, "learning_rate": 6.661163508111655e-06, "loss": 0.5077, "step": 6443 }, { "epoch": 0.82, "grad_norm": 0.7335877244020317, "learning_rate": 6.66019048327638e-06, "loss": 0.594, "step": 6444 }, { "epoch": 0.82, "grad_norm": 0.5847035984154572, "learning_rate": 6.659217387772099e-06, "loss": 0.5017, "step": 6445 }, { "epoch": 0.82, "grad_norm": 0.6953747917685108, "learning_rate": 6.658244221640238e-06, "loss": 0.5715, "step": 6446 }, { "epoch": 0.82, "grad_norm": 0.7203864369924682, "learning_rate": 6.657270984922217e-06, "loss": 0.5861, "step": 6447 }, { "epoch": 0.82, "grad_norm": 0.5187896783444225, "learning_rate": 6.6562976776594686e-06, "loss": 0.5062, "step": 6448 }, { "epoch": 0.82, "grad_norm": 0.7099647596179366, "learning_rate": 6.65532429989342e-06, "loss": 0.5598, "step": 6449 }, { "epoch": 0.82, "grad_norm": 0.7002824694552254, "learning_rate": 6.654350851665505e-06, "loss": 0.5226, "step": 6450 }, { "epoch": 0.82, "grad_norm": 0.721513877311857, "learning_rate": 6.65337733301716e-06, "loss": 0.546, "step": 6451 }, { "epoch": 0.82, "grad_norm": 0.5634849603066018, "learning_rate": 6.652403743989827e-06, "loss": 0.5039, "step": 6452 }, { "epoch": 0.82, "grad_norm": 0.569399846285577, "learning_rate": 6.651430084624947e-06, "loss": 0.4602, "step": 6453 }, { "epoch": 0.82, "grad_norm": 0.6769632673894282, "learning_rate": 6.650456354963966e-06, "loss": 0.5568, "step": 6454 }, { "epoch": 0.82, "grad_norm": 0.6914939967947034, "learning_rate": 6.64948255504833e-06, "loss": 0.5609, "step": 6455 }, { "epoch": 0.82, "grad_norm": 0.8084550059355045, "learning_rate": 6.648508684919495e-06, "loss": 0.6309, "step": 6456 }, { "epoch": 0.82, "grad_norm": 0.5736143024332176, "learning_rate": 6.647534744618911e-06, "loss": 0.4859, "step": 6457 }, { "epoch": 0.82, "grad_norm": 0.5459911748431158, "learning_rate": 6.646560734188039e-06, "loss": 0.5395, "step": 6458 }, { "epoch": 0.82, "grad_norm": 0.647497937146447, "learning_rate": 6.645586653668337e-06, "loss": 0.5455, "step": 6459 }, { "epoch": 0.82, "grad_norm": 0.7416950392981845, "learning_rate": 6.644612503101271e-06, "loss": 0.5421, "step": 6460 }, { "epoch": 0.82, "grad_norm": 0.769850560611796, "learning_rate": 6.643638282528306e-06, "loss": 0.6333, "step": 6461 }, { "epoch": 0.82, "grad_norm": 0.787546334563346, "learning_rate": 6.642663991990911e-06, "loss": 0.6094, "step": 6462 }, { "epoch": 0.82, "grad_norm": 0.6316002600992692, "learning_rate": 6.641689631530559e-06, "loss": 0.5062, "step": 6463 }, { "epoch": 0.82, "grad_norm": 0.7174694295250796, "learning_rate": 6.640715201188727e-06, "loss": 0.6388, "step": 6464 }, { "epoch": 0.82, "grad_norm": 0.7925445165429377, "learning_rate": 6.639740701006889e-06, "loss": 0.5913, "step": 6465 }, { "epoch": 0.82, "grad_norm": 0.7000180266583456, "learning_rate": 6.6387661310265315e-06, "loss": 0.622, "step": 6466 }, { "epoch": 0.82, "grad_norm": 0.6286676745558507, "learning_rate": 6.637791491289136e-06, "loss": 0.5497, "step": 6467 }, { "epoch": 0.82, "grad_norm": 0.764354709144767, "learning_rate": 6.63681678183619e-06, "loss": 0.5325, "step": 6468 }, { "epoch": 0.82, "grad_norm": 0.8300264652027384, "learning_rate": 6.635842002709185e-06, "loss": 0.5746, "step": 6469 }, { "epoch": 0.82, "grad_norm": 0.5822342238397752, "learning_rate": 6.634867153949613e-06, "loss": 0.5355, "step": 6470 }, { "epoch": 0.82, "grad_norm": 0.6681859347938008, "learning_rate": 6.633892235598971e-06, "loss": 0.4999, "step": 6471 }, { "epoch": 0.82, "grad_norm": 0.6195650881913226, "learning_rate": 6.6329172476987565e-06, "loss": 0.521, "step": 6472 }, { "epoch": 0.82, "grad_norm": 0.5777079027188079, "learning_rate": 6.631942190290474e-06, "loss": 0.5668, "step": 6473 }, { "epoch": 0.82, "grad_norm": 0.7482830965645356, "learning_rate": 6.6309670634156265e-06, "loss": 0.6085, "step": 6474 }, { "epoch": 0.82, "grad_norm": 0.6393661927024398, "learning_rate": 6.629991867115724e-06, "loss": 0.5, "step": 6475 }, { "epoch": 0.83, "grad_norm": 0.6625920334685923, "learning_rate": 6.629016601432275e-06, "loss": 0.4856, "step": 6476 }, { "epoch": 0.83, "grad_norm": 0.5970890277792154, "learning_rate": 6.628041266406795e-06, "loss": 0.5153, "step": 6477 }, { "epoch": 0.83, "grad_norm": 0.7767704226989249, "learning_rate": 6.6270658620808015e-06, "loss": 0.6396, "step": 6478 }, { "epoch": 0.83, "grad_norm": 0.8129509103335189, "learning_rate": 6.6260903884958135e-06, "loss": 0.605, "step": 6479 }, { "epoch": 0.83, "grad_norm": 0.7574390987238969, "learning_rate": 6.625114845693353e-06, "loss": 0.6333, "step": 6480 }, { "epoch": 0.83, "grad_norm": 0.83783835637728, "learning_rate": 6.624139233714948e-06, "loss": 0.5886, "step": 6481 }, { "epoch": 0.83, "grad_norm": 0.5744249648369125, "learning_rate": 6.623163552602125e-06, "loss": 0.4702, "step": 6482 }, { "epoch": 0.83, "grad_norm": 0.6065719104766597, "learning_rate": 6.622187802396416e-06, "loss": 0.5621, "step": 6483 }, { "epoch": 0.83, "grad_norm": 0.6252720927272606, "learning_rate": 6.6212119831393574e-06, "loss": 0.526, "step": 6484 }, { "epoch": 0.83, "grad_norm": 0.7300166847462839, "learning_rate": 6.620236094872485e-06, "loss": 0.5268, "step": 6485 }, { "epoch": 0.83, "grad_norm": 0.5193651488662671, "learning_rate": 6.619260137637339e-06, "loss": 0.4615, "step": 6486 }, { "epoch": 0.83, "grad_norm": 0.6086862260498218, "learning_rate": 6.618284111475464e-06, "loss": 0.5297, "step": 6487 }, { "epoch": 0.83, "grad_norm": 0.7110119872791875, "learning_rate": 6.617308016428405e-06, "loss": 0.614, "step": 6488 }, { "epoch": 0.83, "grad_norm": 0.7106997797929842, "learning_rate": 6.616331852537712e-06, "loss": 0.5398, "step": 6489 }, { "epoch": 0.83, "grad_norm": 0.7885781093659249, "learning_rate": 6.615355619844937e-06, "loss": 0.6141, "step": 6490 }, { "epoch": 0.83, "grad_norm": 0.5541225600642373, "learning_rate": 6.614379318391635e-06, "loss": 0.4978, "step": 6491 }, { "epoch": 0.83, "grad_norm": 0.643156282920605, "learning_rate": 6.613402948219365e-06, "loss": 0.5638, "step": 6492 }, { "epoch": 0.83, "grad_norm": 0.6530901853978821, "learning_rate": 6.6124265093696874e-06, "loss": 0.5167, "step": 6493 }, { "epoch": 0.83, "grad_norm": 0.7271421660121189, "learning_rate": 6.6114500018841655e-06, "loss": 0.6107, "step": 6494 }, { "epoch": 0.83, "grad_norm": 1.0035529231353035, "learning_rate": 6.6104734258043655e-06, "loss": 0.5976, "step": 6495 }, { "epoch": 0.83, "grad_norm": 0.5941332092616888, "learning_rate": 6.609496781171859e-06, "loss": 0.505, "step": 6496 }, { "epoch": 0.83, "grad_norm": 0.6774864696177553, "learning_rate": 6.608520068028218e-06, "loss": 0.5789, "step": 6497 }, { "epoch": 0.83, "grad_norm": 0.6889388082929483, "learning_rate": 6.607543286415017e-06, "loss": 0.604, "step": 6498 }, { "epoch": 0.83, "grad_norm": 1.0277921515906445, "learning_rate": 6.606566436373836e-06, "loss": 0.6065, "step": 6499 }, { "epoch": 0.83, "grad_norm": 0.6169010073526765, "learning_rate": 6.605589517946256e-06, "loss": 0.5183, "step": 6500 }, { "epoch": 0.83, "grad_norm": 0.7065624553356541, "learning_rate": 6.6046125311738595e-06, "loss": 0.5776, "step": 6501 }, { "epoch": 0.83, "grad_norm": 0.8840570022135378, "learning_rate": 6.603635476098236e-06, "loss": 0.6505, "step": 6502 }, { "epoch": 0.83, "grad_norm": 0.7868794113995556, "learning_rate": 6.602658352760975e-06, "loss": 0.6901, "step": 6503 }, { "epoch": 0.83, "grad_norm": 0.7150974512226105, "learning_rate": 6.6016811612036695e-06, "loss": 0.5406, "step": 6504 }, { "epoch": 0.83, "grad_norm": 0.5630857591089643, "learning_rate": 6.600703901467914e-06, "loss": 0.5951, "step": 6505 }, { "epoch": 0.83, "grad_norm": 0.5373656574791883, "learning_rate": 6.599726573595309e-06, "loss": 0.4648, "step": 6506 }, { "epoch": 0.83, "grad_norm": 0.8166571002152593, "learning_rate": 6.598749177627456e-06, "loss": 0.6198, "step": 6507 }, { "epoch": 0.83, "grad_norm": 0.6807433614817046, "learning_rate": 6.59777171360596e-06, "loss": 0.6431, "step": 6508 }, { "epoch": 0.83, "grad_norm": 0.7913926797743724, "learning_rate": 6.596794181572428e-06, "loss": 0.5567, "step": 6509 }, { "epoch": 0.83, "grad_norm": 0.6324205471993662, "learning_rate": 6.5958165815684696e-06, "loss": 0.5484, "step": 6510 }, { "epoch": 0.83, "grad_norm": 0.6253357026129646, "learning_rate": 6.594838913635698e-06, "loss": 0.5301, "step": 6511 }, { "epoch": 0.83, "grad_norm": 0.5970319795024827, "learning_rate": 6.5938611778157315e-06, "loss": 0.533, "step": 6512 }, { "epoch": 0.83, "grad_norm": 0.5828355647198787, "learning_rate": 6.5928833741501875e-06, "loss": 0.4857, "step": 6513 }, { "epoch": 0.83, "grad_norm": 0.6678940477959378, "learning_rate": 6.591905502680688e-06, "loss": 0.5708, "step": 6514 }, { "epoch": 0.83, "grad_norm": 0.63703960912389, "learning_rate": 6.590927563448859e-06, "loss": 0.519, "step": 6515 }, { "epoch": 0.83, "grad_norm": 0.7370632149374999, "learning_rate": 6.5899495564963266e-06, "loss": 0.6372, "step": 6516 }, { "epoch": 0.83, "grad_norm": 0.5803613925385694, "learning_rate": 6.588971481864723e-06, "loss": 0.5234, "step": 6517 }, { "epoch": 0.83, "grad_norm": 0.6550923523037336, "learning_rate": 6.587993339595682e-06, "loss": 0.5217, "step": 6518 }, { "epoch": 0.83, "grad_norm": 0.7389184340573136, "learning_rate": 6.587015129730839e-06, "loss": 0.5622, "step": 6519 }, { "epoch": 0.83, "grad_norm": 0.8541712774022205, "learning_rate": 6.5860368523118305e-06, "loss": 0.6901, "step": 6520 }, { "epoch": 0.83, "grad_norm": 0.6462953914893367, "learning_rate": 6.585058507380303e-06, "loss": 0.5339, "step": 6521 }, { "epoch": 0.83, "grad_norm": 0.7844240914151441, "learning_rate": 6.584080094977901e-06, "loss": 0.6381, "step": 6522 }, { "epoch": 0.83, "grad_norm": 0.8928868013268039, "learning_rate": 6.58310161514627e-06, "loss": 0.6651, "step": 6523 }, { "epoch": 0.83, "grad_norm": 0.8344502485279831, "learning_rate": 6.582123067927062e-06, "loss": 0.6399, "step": 6524 }, { "epoch": 0.83, "grad_norm": 0.6902543562148888, "learning_rate": 6.581144453361932e-06, "loss": 0.5723, "step": 6525 }, { "epoch": 0.83, "grad_norm": 0.7606838378903613, "learning_rate": 6.580165771492535e-06, "loss": 0.6172, "step": 6526 }, { "epoch": 0.83, "grad_norm": 0.7680349264800025, "learning_rate": 6.57918702236053e-06, "loss": 0.5181, "step": 6527 }, { "epoch": 0.83, "grad_norm": 0.681643533752028, "learning_rate": 6.5782082060075796e-06, "loss": 0.5406, "step": 6528 }, { "epoch": 0.83, "grad_norm": 0.6649957214325597, "learning_rate": 6.5772293224753496e-06, "loss": 0.5044, "step": 6529 }, { "epoch": 0.83, "grad_norm": 0.66792564259355, "learning_rate": 6.576250371805507e-06, "loss": 0.5156, "step": 6530 }, { "epoch": 0.83, "grad_norm": 0.6299622261759319, "learning_rate": 6.575271354039724e-06, "loss": 0.5279, "step": 6531 }, { "epoch": 0.83, "grad_norm": 0.881512906590141, "learning_rate": 6.5742922692196734e-06, "loss": 0.6224, "step": 6532 }, { "epoch": 0.83, "grad_norm": 0.6564445611536593, "learning_rate": 6.57331311738703e-06, "loss": 0.5309, "step": 6533 }, { "epoch": 0.83, "grad_norm": 0.8131860687793555, "learning_rate": 6.572333898583476e-06, "loss": 0.6158, "step": 6534 }, { "epoch": 0.83, "grad_norm": 0.6407124194578098, "learning_rate": 6.571354612850693e-06, "loss": 0.5341, "step": 6535 }, { "epoch": 0.83, "grad_norm": 0.7328902208857833, "learning_rate": 6.570375260230364e-06, "loss": 0.5881, "step": 6536 }, { "epoch": 0.83, "grad_norm": 0.8464478431660932, "learning_rate": 6.56939584076418e-06, "loss": 0.6299, "step": 6537 }, { "epoch": 0.83, "grad_norm": 0.7191489096232528, "learning_rate": 6.568416354493827e-06, "loss": 0.5755, "step": 6538 }, { "epoch": 0.83, "grad_norm": 0.7365817050537748, "learning_rate": 6.5674368014610044e-06, "loss": 0.5944, "step": 6539 }, { "epoch": 0.83, "grad_norm": 1.0455182794406508, "learning_rate": 6.5664571817074065e-06, "loss": 0.6279, "step": 6540 }, { "epoch": 0.83, "grad_norm": 0.6900667703590811, "learning_rate": 6.56547749527473e-06, "loss": 0.5508, "step": 6541 }, { "epoch": 0.83, "grad_norm": 0.7366802134564446, "learning_rate": 6.5644977422046805e-06, "loss": 0.6097, "step": 6542 }, { "epoch": 0.83, "grad_norm": 0.5854236989657675, "learning_rate": 6.5635179225389615e-06, "loss": 0.52, "step": 6543 }, { "epoch": 0.83, "grad_norm": 0.7334019559981301, "learning_rate": 6.5625380363192795e-06, "loss": 0.5177, "step": 6544 }, { "epoch": 0.83, "grad_norm": 0.8178026265013155, "learning_rate": 6.5615580835873476e-06, "loss": 0.6622, "step": 6545 }, { "epoch": 0.83, "grad_norm": 1.082534676157764, "learning_rate": 6.560578064384879e-06, "loss": 0.5514, "step": 6546 }, { "epoch": 0.83, "grad_norm": 0.6947063486131437, "learning_rate": 6.5595979787535865e-06, "loss": 0.6005, "step": 6547 }, { "epoch": 0.83, "grad_norm": 0.716733835205423, "learning_rate": 6.558617826735194e-06, "loss": 0.612, "step": 6548 }, { "epoch": 0.83, "grad_norm": 0.7176508683791621, "learning_rate": 6.557637608371421e-06, "loss": 0.5973, "step": 6549 }, { "epoch": 0.83, "grad_norm": 0.6078637995408926, "learning_rate": 6.5566573237039925e-06, "loss": 0.5557, "step": 6550 }, { "epoch": 0.83, "grad_norm": 0.7333544028476878, "learning_rate": 6.555676972774634e-06, "loss": 0.6464, "step": 6551 }, { "epoch": 0.83, "grad_norm": 0.562588182862289, "learning_rate": 6.554696555625079e-06, "loss": 0.4827, "step": 6552 }, { "epoch": 0.83, "grad_norm": 0.8022970561682957, "learning_rate": 6.553716072297061e-06, "loss": 0.6269, "step": 6553 }, { "epoch": 0.83, "grad_norm": 0.7294224063774781, "learning_rate": 6.552735522832314e-06, "loss": 0.6535, "step": 6554 }, { "epoch": 0.84, "grad_norm": 0.607388750049785, "learning_rate": 6.551754907272578e-06, "loss": 0.5564, "step": 6555 }, { "epoch": 0.84, "grad_norm": 0.6561026084748848, "learning_rate": 6.550774225659594e-06, "loss": 0.517, "step": 6556 }, { "epoch": 0.84, "grad_norm": 0.5607195291470843, "learning_rate": 6.549793478035107e-06, "loss": 0.4973, "step": 6557 }, { "epoch": 0.84, "grad_norm": 0.7643485170954416, "learning_rate": 6.548812664440864e-06, "loss": 0.6615, "step": 6558 }, { "epoch": 0.84, "grad_norm": 0.5919573705467266, "learning_rate": 6.547831784918614e-06, "loss": 0.4944, "step": 6559 }, { "epoch": 0.84, "grad_norm": 0.5682581819493193, "learning_rate": 6.546850839510113e-06, "loss": 0.5503, "step": 6560 }, { "epoch": 0.84, "grad_norm": 0.6050789187504977, "learning_rate": 6.545869828257114e-06, "loss": 0.5004, "step": 6561 }, { "epoch": 0.84, "grad_norm": 0.7396563529524641, "learning_rate": 6.5448887512013755e-06, "loss": 0.6026, "step": 6562 }, { "epoch": 0.84, "grad_norm": 0.7393027655715211, "learning_rate": 6.5439076083846595e-06, "loss": 0.6695, "step": 6563 }, { "epoch": 0.84, "grad_norm": 0.6957350790728904, "learning_rate": 6.542926399848732e-06, "loss": 0.5995, "step": 6564 }, { "epoch": 0.84, "grad_norm": 0.5852414204167266, "learning_rate": 6.541945125635357e-06, "loss": 0.5134, "step": 6565 }, { "epoch": 0.84, "grad_norm": 0.8122329245922367, "learning_rate": 6.5409637857863066e-06, "loss": 0.6121, "step": 6566 }, { "epoch": 0.84, "grad_norm": 0.6456744614721865, "learning_rate": 6.539982380343351e-06, "loss": 0.5703, "step": 6567 }, { "epoch": 0.84, "grad_norm": 0.6256095112322341, "learning_rate": 6.539000909348266e-06, "loss": 0.4991, "step": 6568 }, { "epoch": 0.84, "grad_norm": 0.661420890309588, "learning_rate": 6.5380193728428294e-06, "loss": 0.4821, "step": 6569 }, { "epoch": 0.84, "grad_norm": 0.6184074991915045, "learning_rate": 6.537037770868825e-06, "loss": 0.6062, "step": 6570 }, { "epoch": 0.84, "grad_norm": 0.6914185402351574, "learning_rate": 6.536056103468034e-06, "loss": 0.5737, "step": 6571 }, { "epoch": 0.84, "grad_norm": 0.7803554062738187, "learning_rate": 6.535074370682243e-06, "loss": 0.6205, "step": 6572 }, { "epoch": 0.84, "grad_norm": 0.7003952693039684, "learning_rate": 6.534092572553241e-06, "loss": 0.5838, "step": 6573 }, { "epoch": 0.84, "grad_norm": 0.6554043794474748, "learning_rate": 6.533110709122821e-06, "loss": 0.5549, "step": 6574 }, { "epoch": 0.84, "grad_norm": 0.757956701525781, "learning_rate": 6.532128780432776e-06, "loss": 0.5991, "step": 6575 }, { "epoch": 0.84, "grad_norm": 0.621540951032334, "learning_rate": 6.5311467865249064e-06, "loss": 0.5298, "step": 6576 }, { "epoch": 0.84, "grad_norm": 0.6822587431034667, "learning_rate": 6.5301647274410105e-06, "loss": 0.5544, "step": 6577 }, { "epoch": 0.84, "grad_norm": 1.0023514807622687, "learning_rate": 6.529182603222892e-06, "loss": 0.5842, "step": 6578 }, { "epoch": 0.84, "grad_norm": 0.5573906384691116, "learning_rate": 6.528200413912357e-06, "loss": 0.4875, "step": 6579 }, { "epoch": 0.84, "grad_norm": 0.7442181351488076, "learning_rate": 6.527218159551213e-06, "loss": 0.6373, "step": 6580 }, { "epoch": 0.84, "grad_norm": 0.711695797430948, "learning_rate": 6.526235840181272e-06, "loss": 0.5052, "step": 6581 }, { "epoch": 0.84, "grad_norm": 0.6432289557796902, "learning_rate": 6.525253455844348e-06, "loss": 0.5628, "step": 6582 }, { "epoch": 0.84, "grad_norm": 0.6137086632156515, "learning_rate": 6.524271006582259e-06, "loss": 0.5217, "step": 6583 }, { "epoch": 0.84, "grad_norm": 0.5704356839424467, "learning_rate": 6.523288492436826e-06, "loss": 0.5548, "step": 6584 }, { "epoch": 0.84, "grad_norm": 0.6205227296711358, "learning_rate": 6.522305913449867e-06, "loss": 0.5726, "step": 6585 }, { "epoch": 0.84, "grad_norm": 0.7307209198799085, "learning_rate": 6.521323269663211e-06, "loss": 0.5787, "step": 6586 }, { "epoch": 0.84, "grad_norm": 0.5419852748657038, "learning_rate": 6.5203405611186855e-06, "loss": 0.4894, "step": 6587 }, { "epoch": 0.84, "grad_norm": 0.7233683517376814, "learning_rate": 6.519357787858119e-06, "loss": 0.6103, "step": 6588 }, { "epoch": 0.84, "grad_norm": 0.6430593217668235, "learning_rate": 6.518374949923347e-06, "loss": 0.5225, "step": 6589 }, { "epoch": 0.84, "grad_norm": 0.7916466069694474, "learning_rate": 6.517392047356205e-06, "loss": 0.5987, "step": 6590 }, { "epoch": 0.84, "grad_norm": 1.2594116491866636, "learning_rate": 6.516409080198535e-06, "loss": 0.6445, "step": 6591 }, { "epoch": 0.84, "grad_norm": 0.5840893942174273, "learning_rate": 6.5154260484921735e-06, "loss": 0.5139, "step": 6592 }, { "epoch": 0.84, "grad_norm": 0.7183332341164766, "learning_rate": 6.51444295227897e-06, "loss": 0.5961, "step": 6593 }, { "epoch": 0.84, "grad_norm": 0.8634584307887804, "learning_rate": 6.513459791600768e-06, "loss": 0.6501, "step": 6594 }, { "epoch": 0.84, "grad_norm": 0.7755051665441314, "learning_rate": 6.51247656649942e-06, "loss": 0.6365, "step": 6595 }, { "epoch": 0.84, "grad_norm": 1.19429483664414, "learning_rate": 6.511493277016776e-06, "loss": 0.574, "step": 6596 }, { "epoch": 0.84, "grad_norm": 0.6991586381722998, "learning_rate": 6.510509923194694e-06, "loss": 0.56, "step": 6597 }, { "epoch": 0.84, "grad_norm": 0.5917032495669147, "learning_rate": 6.509526505075032e-06, "loss": 0.5403, "step": 6598 }, { "epoch": 0.84, "grad_norm": 0.6509379503007197, "learning_rate": 6.508543022699652e-06, "loss": 0.6081, "step": 6599 }, { "epoch": 0.84, "grad_norm": 0.6189794787068471, "learning_rate": 6.5075594761104126e-06, "loss": 0.5519, "step": 6600 }, { "epoch": 0.84, "grad_norm": 0.6791622848975046, "learning_rate": 6.506575865349185e-06, "loss": 0.5481, "step": 6601 }, { "epoch": 0.84, "grad_norm": 0.7531039358858942, "learning_rate": 6.505592190457838e-06, "loss": 0.6141, "step": 6602 }, { "epoch": 0.84, "grad_norm": 0.8505550260254032, "learning_rate": 6.504608451478242e-06, "loss": 0.5806, "step": 6603 }, { "epoch": 0.84, "grad_norm": 1.0209036518929284, "learning_rate": 6.503624648452273e-06, "loss": 0.6671, "step": 6604 }, { "epoch": 0.84, "grad_norm": 0.6238119148373528, "learning_rate": 6.502640781421807e-06, "loss": 0.5509, "step": 6605 }, { "epoch": 0.84, "grad_norm": 1.0555480266865047, "learning_rate": 6.501656850428724e-06, "loss": 0.6019, "step": 6606 }, { "epoch": 0.84, "grad_norm": 0.5895111872159797, "learning_rate": 6.500672855514909e-06, "loss": 0.5206, "step": 6607 }, { "epoch": 0.84, "grad_norm": 0.7822501077831937, "learning_rate": 6.499688796722244e-06, "loss": 0.6117, "step": 6608 }, { "epoch": 0.84, "grad_norm": 0.6030800136079699, "learning_rate": 6.498704674092622e-06, "loss": 0.4907, "step": 6609 }, { "epoch": 0.84, "grad_norm": 0.7311892330290485, "learning_rate": 6.497720487667929e-06, "loss": 0.6285, "step": 6610 }, { "epoch": 0.84, "grad_norm": 0.6095372510185418, "learning_rate": 6.496736237490062e-06, "loss": 0.5183, "step": 6611 }, { "epoch": 0.84, "grad_norm": 0.6074076574843538, "learning_rate": 6.4957519236009156e-06, "loss": 0.5039, "step": 6612 }, { "epoch": 0.84, "grad_norm": 0.7935921019013585, "learning_rate": 6.494767546042389e-06, "loss": 0.5032, "step": 6613 }, { "epoch": 0.84, "grad_norm": 0.7714659687510147, "learning_rate": 6.493783104856386e-06, "loss": 0.6196, "step": 6614 }, { "epoch": 0.84, "grad_norm": 0.5924889971866503, "learning_rate": 6.492798600084809e-06, "loss": 0.5081, "step": 6615 }, { "epoch": 0.84, "grad_norm": 0.5687662927402892, "learning_rate": 6.491814031769566e-06, "loss": 0.4873, "step": 6616 }, { "epoch": 0.84, "grad_norm": 0.7601024218974984, "learning_rate": 6.4908293999525675e-06, "loss": 0.6131, "step": 6617 }, { "epoch": 0.84, "grad_norm": 1.1927784118332903, "learning_rate": 6.489844704675724e-06, "loss": 0.6214, "step": 6618 }, { "epoch": 0.84, "grad_norm": 0.7791536561609305, "learning_rate": 6.488859945980952e-06, "loss": 0.6187, "step": 6619 }, { "epoch": 0.84, "grad_norm": 0.6684148761859198, "learning_rate": 6.4878751239101715e-06, "loss": 0.5644, "step": 6620 }, { "epoch": 0.84, "grad_norm": 0.7172590504768079, "learning_rate": 6.486890238505301e-06, "loss": 0.5982, "step": 6621 }, { "epoch": 0.84, "grad_norm": 0.6198513943762066, "learning_rate": 6.485905289808264e-06, "loss": 0.5062, "step": 6622 }, { "epoch": 0.84, "grad_norm": 0.6007609126335464, "learning_rate": 6.484920277860988e-06, "loss": 0.5225, "step": 6623 }, { "epoch": 0.84, "grad_norm": 0.6454183493838301, "learning_rate": 6.4839352027054e-06, "loss": 0.519, "step": 6624 }, { "epoch": 0.84, "grad_norm": 0.7218154042427212, "learning_rate": 6.482950064383432e-06, "loss": 0.5352, "step": 6625 }, { "epoch": 0.84, "grad_norm": 0.7128891104835277, "learning_rate": 6.481964862937019e-06, "loss": 0.5917, "step": 6626 }, { "epoch": 0.84, "grad_norm": 0.6481267742263872, "learning_rate": 6.480979598408098e-06, "loss": 0.4986, "step": 6627 }, { "epoch": 0.84, "grad_norm": 0.576561963314315, "learning_rate": 6.479994270838608e-06, "loss": 0.5241, "step": 6628 }, { "epoch": 0.84, "grad_norm": 0.7723381342276889, "learning_rate": 6.479008880270491e-06, "loss": 0.6232, "step": 6629 }, { "epoch": 0.84, "grad_norm": 0.5712802892892588, "learning_rate": 6.478023426745692e-06, "loss": 0.5356, "step": 6630 }, { "epoch": 0.84, "grad_norm": 0.6218614701393628, "learning_rate": 6.477037910306159e-06, "loss": 0.4964, "step": 6631 }, { "epoch": 0.84, "grad_norm": 0.7774841211352943, "learning_rate": 6.476052330993842e-06, "loss": 0.6049, "step": 6632 }, { "epoch": 0.85, "grad_norm": 0.5631581229202345, "learning_rate": 6.475066688850694e-06, "loss": 0.4696, "step": 6633 }, { "epoch": 0.85, "grad_norm": 0.5174789809951127, "learning_rate": 6.474080983918671e-06, "loss": 0.4532, "step": 6634 }, { "epoch": 0.85, "grad_norm": 0.6606388601648999, "learning_rate": 6.473095216239731e-06, "loss": 0.5051, "step": 6635 }, { "epoch": 0.85, "grad_norm": 0.8533707933848041, "learning_rate": 6.472109385855836e-06, "loss": 0.6202, "step": 6636 }, { "epoch": 0.85, "grad_norm": 0.6558808481903375, "learning_rate": 6.471123492808946e-06, "loss": 0.4936, "step": 6637 }, { "epoch": 0.85, "grad_norm": 0.7955505713043879, "learning_rate": 6.470137537141031e-06, "loss": 0.6064, "step": 6638 }, { "epoch": 0.85, "grad_norm": 0.7829917928962898, "learning_rate": 6.46915151889406e-06, "loss": 0.6635, "step": 6639 }, { "epoch": 0.85, "grad_norm": 0.770157181848176, "learning_rate": 6.468165438110004e-06, "loss": 0.5681, "step": 6640 }, { "epoch": 0.85, "grad_norm": 0.6250488830721793, "learning_rate": 6.467179294830835e-06, "loss": 0.5073, "step": 6641 }, { "epoch": 0.85, "grad_norm": 0.6868334415337548, "learning_rate": 6.4661930890985335e-06, "loss": 0.5617, "step": 6642 }, { "epoch": 0.85, "grad_norm": 0.7920069716883784, "learning_rate": 6.465206820955076e-06, "loss": 0.6303, "step": 6643 }, { "epoch": 0.85, "grad_norm": 0.715335768436534, "learning_rate": 6.464220490442446e-06, "loss": 0.6082, "step": 6644 }, { "epoch": 0.85, "grad_norm": 0.7336124810956771, "learning_rate": 6.463234097602631e-06, "loss": 0.4735, "step": 6645 }, { "epoch": 0.85, "grad_norm": 0.7202239309172377, "learning_rate": 6.462247642477615e-06, "loss": 0.545, "step": 6646 }, { "epoch": 0.85, "grad_norm": 0.5941892673870489, "learning_rate": 6.461261125109389e-06, "loss": 0.5382, "step": 6647 }, { "epoch": 0.85, "grad_norm": 0.5826192815089393, "learning_rate": 6.460274545539947e-06, "loss": 0.5037, "step": 6648 }, { "epoch": 0.85, "grad_norm": 0.8123260487299397, "learning_rate": 6.459287903811285e-06, "loss": 0.5952, "step": 6649 }, { "epoch": 0.85, "grad_norm": 0.9380444776394964, "learning_rate": 6.458301199965398e-06, "loss": 0.6765, "step": 6650 }, { "epoch": 0.85, "grad_norm": 0.5444336617343107, "learning_rate": 6.457314434044289e-06, "loss": 0.5142, "step": 6651 }, { "epoch": 0.85, "grad_norm": 1.0043128034882705, "learning_rate": 6.456327606089962e-06, "loss": 0.6496, "step": 6652 }, { "epoch": 0.85, "grad_norm": 0.8263048961454296, "learning_rate": 6.455340716144425e-06, "loss": 0.6103, "step": 6653 }, { "epoch": 0.85, "grad_norm": 0.5912204655334908, "learning_rate": 6.4543537642496816e-06, "loss": 0.5506, "step": 6654 }, { "epoch": 0.85, "grad_norm": 0.5919250552538146, "learning_rate": 6.453366750447747e-06, "loss": 0.545, "step": 6655 }, { "epoch": 0.85, "grad_norm": 0.5435661626258214, "learning_rate": 6.452379674780633e-06, "loss": 0.5013, "step": 6656 }, { "epoch": 0.85, "grad_norm": 0.7510925673391442, "learning_rate": 6.45139253729036e-06, "loss": 0.5604, "step": 6657 }, { "epoch": 0.85, "grad_norm": 0.8422280437779414, "learning_rate": 6.450405338018943e-06, "loss": 0.689, "step": 6658 }, { "epoch": 0.85, "grad_norm": 0.7140162987163611, "learning_rate": 6.449418077008405e-06, "loss": 0.6538, "step": 6659 }, { "epoch": 0.85, "grad_norm": 0.5467812951779568, "learning_rate": 6.448430754300772e-06, "loss": 0.4883, "step": 6660 }, { "epoch": 0.85, "grad_norm": 0.714976472578544, "learning_rate": 6.44744336993807e-06, "loss": 0.5659, "step": 6661 }, { "epoch": 0.85, "grad_norm": 0.8578209927853342, "learning_rate": 6.446455923962328e-06, "loss": 0.5807, "step": 6662 }, { "epoch": 0.85, "grad_norm": 0.7644429897205445, "learning_rate": 6.44546841641558e-06, "loss": 0.5787, "step": 6663 }, { "epoch": 0.85, "grad_norm": 0.6509814584879079, "learning_rate": 6.444480847339862e-06, "loss": 0.5612, "step": 6664 }, { "epoch": 0.85, "grad_norm": 0.599574322432731, "learning_rate": 6.443493216777209e-06, "loss": 0.539, "step": 6665 }, { "epoch": 0.85, "grad_norm": 0.6536085707771628, "learning_rate": 6.442505524769663e-06, "loss": 0.548, "step": 6666 }, { "epoch": 0.85, "grad_norm": 0.8134178203412349, "learning_rate": 6.441517771359267e-06, "loss": 0.6095, "step": 6667 }, { "epoch": 0.85, "grad_norm": 0.6252589088357704, "learning_rate": 6.440529956588064e-06, "loss": 0.4954, "step": 6668 }, { "epoch": 0.85, "grad_norm": 0.5902260662712403, "learning_rate": 6.439542080498105e-06, "loss": 0.5406, "step": 6669 }, { "epoch": 0.85, "grad_norm": 0.660871271968508, "learning_rate": 6.43855414313144e-06, "loss": 0.5505, "step": 6670 }, { "epoch": 0.85, "grad_norm": 0.6412905588827149, "learning_rate": 6.437566144530123e-06, "loss": 0.5753, "step": 6671 }, { "epoch": 0.85, "grad_norm": 0.504343457256404, "learning_rate": 6.436578084736207e-06, "loss": 0.4828, "step": 6672 }, { "epoch": 0.85, "grad_norm": 0.6024850680333286, "learning_rate": 6.435589963791754e-06, "loss": 0.5062, "step": 6673 }, { "epoch": 0.85, "grad_norm": 0.7674430865032238, "learning_rate": 6.434601781738824e-06, "loss": 0.5023, "step": 6674 }, { "epoch": 0.85, "grad_norm": 0.8091307097736599, "learning_rate": 6.433613538619479e-06, "loss": 0.5952, "step": 6675 }, { "epoch": 0.85, "grad_norm": 0.7935332083526491, "learning_rate": 6.432625234475788e-06, "loss": 0.6256, "step": 6676 }, { "epoch": 0.85, "grad_norm": 0.8335983934485832, "learning_rate": 6.431636869349818e-06, "loss": 0.6417, "step": 6677 }, { "epoch": 0.85, "grad_norm": 0.7133436863802591, "learning_rate": 6.430648443283642e-06, "loss": 0.6089, "step": 6678 }, { "epoch": 0.85, "grad_norm": 0.7133959633483786, "learning_rate": 6.4296599563193344e-06, "loss": 0.6138, "step": 6679 }, { "epoch": 0.85, "grad_norm": 0.825994999486831, "learning_rate": 6.428671408498969e-06, "loss": 0.5563, "step": 6680 }, { "epoch": 0.85, "grad_norm": 0.6029731726032074, "learning_rate": 6.427682799864628e-06, "loss": 0.5331, "step": 6681 }, { "epoch": 0.85, "grad_norm": 0.6042192317360487, "learning_rate": 6.426694130458392e-06, "loss": 0.5252, "step": 6682 }, { "epoch": 0.85, "grad_norm": 0.6103765821083451, "learning_rate": 6.425705400322346e-06, "loss": 0.5342, "step": 6683 }, { "epoch": 0.85, "grad_norm": 0.712333074987951, "learning_rate": 6.424716609498578e-06, "loss": 0.6003, "step": 6684 }, { "epoch": 0.85, "grad_norm": 0.7858545673161307, "learning_rate": 6.423727758029175e-06, "loss": 0.5751, "step": 6685 }, { "epoch": 0.85, "grad_norm": 0.6271358504217927, "learning_rate": 6.422738845956232e-06, "loss": 0.5103, "step": 6686 }, { "epoch": 0.85, "grad_norm": 0.6542777104160439, "learning_rate": 6.421749873321843e-06, "loss": 0.5235, "step": 6687 }, { "epoch": 0.85, "grad_norm": 0.8814752299022276, "learning_rate": 6.4207608401681045e-06, "loss": 0.5681, "step": 6688 }, { "epoch": 0.85, "grad_norm": 0.6455099468820157, "learning_rate": 6.419771746537118e-06, "loss": 0.5141, "step": 6689 }, { "epoch": 0.85, "grad_norm": 0.7389888470521515, "learning_rate": 6.418782592470985e-06, "loss": 0.584, "step": 6690 }, { "epoch": 0.85, "grad_norm": 0.6119270179492299, "learning_rate": 6.4177933780118094e-06, "loss": 0.5489, "step": 6691 }, { "epoch": 0.85, "grad_norm": 0.8741890718063151, "learning_rate": 6.4168041032017026e-06, "loss": 0.6322, "step": 6692 }, { "epoch": 0.85, "grad_norm": 0.6098622082197017, "learning_rate": 6.41581476808277e-06, "loss": 0.5259, "step": 6693 }, { "epoch": 0.85, "grad_norm": 0.780727278145902, "learning_rate": 6.4148253726971286e-06, "loss": 0.618, "step": 6694 }, { "epoch": 0.85, "grad_norm": 0.6646163861176623, "learning_rate": 6.413835917086892e-06, "loss": 0.5742, "step": 6695 }, { "epoch": 0.85, "grad_norm": 0.7168848602542783, "learning_rate": 6.4128464012941795e-06, "loss": 0.5396, "step": 6696 }, { "epoch": 0.85, "grad_norm": 0.5705818002988751, "learning_rate": 6.411856825361109e-06, "loss": 0.5351, "step": 6697 }, { "epoch": 0.85, "grad_norm": 0.7337958905809268, "learning_rate": 6.410867189329806e-06, "loss": 0.5732, "step": 6698 }, { "epoch": 0.85, "grad_norm": 1.1460587253276808, "learning_rate": 6.4098774932423935e-06, "loss": 0.5923, "step": 6699 }, { "epoch": 0.85, "grad_norm": 0.7969143690271436, "learning_rate": 6.408887737141003e-06, "loss": 0.598, "step": 6700 }, { "epoch": 0.85, "grad_norm": 0.6012642235502653, "learning_rate": 6.407897921067763e-06, "loss": 0.5133, "step": 6701 }, { "epoch": 0.85, "grad_norm": 0.6241253434314572, "learning_rate": 6.406908045064808e-06, "loss": 0.5352, "step": 6702 }, { "epoch": 0.85, "grad_norm": 0.603500412991251, "learning_rate": 6.405918109174274e-06, "loss": 0.5635, "step": 6703 }, { "epoch": 0.85, "grad_norm": 0.7130743433358312, "learning_rate": 6.404928113438298e-06, "loss": 0.6062, "step": 6704 }, { "epoch": 0.85, "grad_norm": 0.8390679425955562, "learning_rate": 6.403938057899021e-06, "loss": 0.5896, "step": 6705 }, { "epoch": 0.85, "grad_norm": 0.6003620047611676, "learning_rate": 6.402947942598588e-06, "loss": 0.4483, "step": 6706 }, { "epoch": 0.85, "grad_norm": 0.7809477775062855, "learning_rate": 6.401957767579143e-06, "loss": 0.6145, "step": 6707 }, { "epoch": 0.85, "grad_norm": 0.7214247044322019, "learning_rate": 6.400967532882838e-06, "loss": 0.5197, "step": 6708 }, { "epoch": 0.85, "grad_norm": 1.0330520167490873, "learning_rate": 6.399977238551821e-06, "loss": 0.6049, "step": 6709 }, { "epoch": 0.85, "grad_norm": 0.538280732392651, "learning_rate": 6.398986884628246e-06, "loss": 0.5185, "step": 6710 }, { "epoch": 0.85, "grad_norm": 0.7801000993264211, "learning_rate": 6.397996471154272e-06, "loss": 0.6316, "step": 6711 }, { "epoch": 0.86, "grad_norm": 0.6546423021000509, "learning_rate": 6.3970059981720526e-06, "loss": 0.5182, "step": 6712 }, { "epoch": 0.86, "grad_norm": 0.8467742519251735, "learning_rate": 6.396015465723754e-06, "loss": 0.6319, "step": 6713 }, { "epoch": 0.86, "grad_norm": 0.6799915195617798, "learning_rate": 6.395024873851537e-06, "loss": 0.5505, "step": 6714 }, { "epoch": 0.86, "grad_norm": 0.7187111517934552, "learning_rate": 6.394034222597571e-06, "loss": 0.5635, "step": 6715 }, { "epoch": 0.86, "grad_norm": 0.6112867117847852, "learning_rate": 6.393043512004022e-06, "loss": 0.5009, "step": 6716 }, { "epoch": 0.86, "grad_norm": 0.7582173584268032, "learning_rate": 6.39205274211306e-06, "loss": 0.6385, "step": 6717 }, { "epoch": 0.86, "grad_norm": 0.6696693004498872, "learning_rate": 6.391061912966864e-06, "loss": 0.5543, "step": 6718 }, { "epoch": 0.86, "grad_norm": 0.6591884404022977, "learning_rate": 6.3900710246076055e-06, "loss": 0.5493, "step": 6719 }, { "epoch": 0.86, "grad_norm": 0.7622445422933825, "learning_rate": 6.389080077077467e-06, "loss": 0.6071, "step": 6720 }, { "epoch": 0.86, "grad_norm": 0.7749665447037803, "learning_rate": 6.3880890704186285e-06, "loss": 0.6113, "step": 6721 }, { "epoch": 0.86, "grad_norm": 0.5555647932447542, "learning_rate": 6.387098004673274e-06, "loss": 0.4722, "step": 6722 }, { "epoch": 0.86, "grad_norm": 0.591486054805839, "learning_rate": 6.386106879883589e-06, "loss": 0.5137, "step": 6723 }, { "epoch": 0.86, "grad_norm": 0.6986612083317825, "learning_rate": 6.385115696091763e-06, "loss": 0.5689, "step": 6724 }, { "epoch": 0.86, "grad_norm": 0.6413008031795211, "learning_rate": 6.384124453339988e-06, "loss": 0.5151, "step": 6725 }, { "epoch": 0.86, "grad_norm": 0.6807008739889637, "learning_rate": 6.38313315167046e-06, "loss": 0.5474, "step": 6726 }, { "epoch": 0.86, "grad_norm": 0.7190364538883304, "learning_rate": 6.382141791125371e-06, "loss": 0.6143, "step": 6727 }, { "epoch": 0.86, "grad_norm": 0.7138245656461645, "learning_rate": 6.381150371746925e-06, "loss": 0.5144, "step": 6728 }, { "epoch": 0.86, "grad_norm": 0.6731068278629442, "learning_rate": 6.380158893577321e-06, "loss": 0.539, "step": 6729 }, { "epoch": 0.86, "grad_norm": 0.6334266464840334, "learning_rate": 6.379167356658761e-06, "loss": 0.4887, "step": 6730 }, { "epoch": 0.86, "grad_norm": 0.6136014123301498, "learning_rate": 6.378175761033456e-06, "loss": 0.5422, "step": 6731 }, { "epoch": 0.86, "grad_norm": 0.5998200679599752, "learning_rate": 6.377184106743612e-06, "loss": 0.5262, "step": 6732 }, { "epoch": 0.86, "grad_norm": 0.5659747295383475, "learning_rate": 6.3761923938314415e-06, "loss": 0.5157, "step": 6733 }, { "epoch": 0.86, "grad_norm": 0.5649108934638767, "learning_rate": 6.375200622339159e-06, "loss": 0.4787, "step": 6734 }, { "epoch": 0.86, "grad_norm": 0.7574165968262838, "learning_rate": 6.37420879230898e-06, "loss": 0.5744, "step": 6735 }, { "epoch": 0.86, "grad_norm": 1.4388942882369706, "learning_rate": 6.3732169037831246e-06, "loss": 0.6236, "step": 6736 }, { "epoch": 0.86, "grad_norm": 0.9795215249888689, "learning_rate": 6.372224956803812e-06, "loss": 0.622, "step": 6737 }, { "epoch": 0.86, "grad_norm": 0.5670350333469152, "learning_rate": 6.37123295141327e-06, "loss": 0.5263, "step": 6738 }, { "epoch": 0.86, "grad_norm": 0.6845583822661014, "learning_rate": 6.3702408876537224e-06, "loss": 0.5792, "step": 6739 }, { "epoch": 0.86, "grad_norm": 0.6381552008092994, "learning_rate": 6.3692487655674e-06, "loss": 0.5252, "step": 6740 }, { "epoch": 0.86, "grad_norm": 0.5986972026279694, "learning_rate": 6.368256585196532e-06, "loss": 0.552, "step": 6741 }, { "epoch": 0.86, "grad_norm": 0.7205224659588964, "learning_rate": 6.3672643465833525e-06, "loss": 0.6018, "step": 6742 }, { "epoch": 0.86, "grad_norm": 0.8062345365126261, "learning_rate": 6.3662720497700996e-06, "loss": 0.5429, "step": 6743 }, { "epoch": 0.86, "grad_norm": 0.6414281985888026, "learning_rate": 6.365279694799012e-06, "loss": 0.5147, "step": 6744 }, { "epoch": 0.86, "grad_norm": 0.7556485437413981, "learning_rate": 6.364287281712331e-06, "loss": 0.5875, "step": 6745 }, { "epoch": 0.86, "grad_norm": 0.5826631493802104, "learning_rate": 6.3632948105522995e-06, "loss": 0.4515, "step": 6746 }, { "epoch": 0.86, "grad_norm": 0.7531103371860258, "learning_rate": 6.362302281361165e-06, "loss": 0.6538, "step": 6747 }, { "epoch": 0.86, "grad_norm": 0.7466964161900267, "learning_rate": 6.361309694181175e-06, "loss": 0.6341, "step": 6748 }, { "epoch": 0.86, "grad_norm": 0.5391393020601636, "learning_rate": 6.3603170490545805e-06, "loss": 0.5266, "step": 6749 }, { "epoch": 0.86, "grad_norm": 0.7314106997727251, "learning_rate": 6.3593243460236376e-06, "loss": 0.5822, "step": 6750 }, { "epoch": 0.86, "grad_norm": 0.7067591723768756, "learning_rate": 6.3583315851306e-06, "loss": 0.571, "step": 6751 }, { "epoch": 0.86, "grad_norm": 0.5537794363673365, "learning_rate": 6.357338766417729e-06, "loss": 0.5056, "step": 6752 }, { "epoch": 0.86, "grad_norm": 0.6287934355885714, "learning_rate": 6.356345889927284e-06, "loss": 0.5547, "step": 6753 }, { "epoch": 0.86, "grad_norm": 0.7113761522520146, "learning_rate": 6.355352955701528e-06, "loss": 0.6214, "step": 6754 }, { "epoch": 0.86, "grad_norm": 0.9750561780835462, "learning_rate": 6.354359963782726e-06, "loss": 0.619, "step": 6755 }, { "epoch": 0.86, "grad_norm": 0.8919735503043947, "learning_rate": 6.353366914213151e-06, "loss": 0.6053, "step": 6756 }, { "epoch": 0.86, "grad_norm": 0.709234512999577, "learning_rate": 6.3523738070350705e-06, "loss": 0.6305, "step": 6757 }, { "epoch": 0.86, "grad_norm": 0.8205006112398691, "learning_rate": 6.351380642290757e-06, "loss": 0.6353, "step": 6758 }, { "epoch": 0.86, "grad_norm": 0.5574928899055271, "learning_rate": 6.35038742002249e-06, "loss": 0.4891, "step": 6759 }, { "epoch": 0.86, "grad_norm": 0.5304699646174911, "learning_rate": 6.349394140272546e-06, "loss": 0.5112, "step": 6760 }, { "epoch": 0.86, "grad_norm": 0.7501973588898401, "learning_rate": 6.348400803083204e-06, "loss": 0.5445, "step": 6761 }, { "epoch": 0.86, "grad_norm": 0.9388017352297809, "learning_rate": 6.347407408496749e-06, "loss": 0.5875, "step": 6762 }, { "epoch": 0.86, "grad_norm": 0.7173405857868247, "learning_rate": 6.346413956555466e-06, "loss": 0.5842, "step": 6763 }, { "epoch": 0.86, "grad_norm": 0.8187707723663016, "learning_rate": 6.345420447301644e-06, "loss": 0.6533, "step": 6764 }, { "epoch": 0.86, "grad_norm": 0.7417909680826302, "learning_rate": 6.3444268807775736e-06, "loss": 0.5896, "step": 6765 }, { "epoch": 0.86, "grad_norm": 0.7642172112967541, "learning_rate": 6.3434332570255465e-06, "loss": 0.6128, "step": 6766 }, { "epoch": 0.86, "grad_norm": 0.6114815433787782, "learning_rate": 6.342439576087858e-06, "loss": 0.5052, "step": 6767 }, { "epoch": 0.86, "grad_norm": 1.0236714350210854, "learning_rate": 6.341445838006806e-06, "loss": 0.5985, "step": 6768 }, { "epoch": 0.86, "grad_norm": 0.6744256826388978, "learning_rate": 6.340452042824693e-06, "loss": 0.4624, "step": 6769 }, { "epoch": 0.86, "grad_norm": 0.7222292963664174, "learning_rate": 6.339458190583819e-06, "loss": 0.5547, "step": 6770 }, { "epoch": 0.86, "grad_norm": 0.7537838566144314, "learning_rate": 6.3384642813264904e-06, "loss": 0.6251, "step": 6771 }, { "epoch": 0.86, "grad_norm": 0.5984447339557778, "learning_rate": 6.337470315095016e-06, "loss": 0.5484, "step": 6772 }, { "epoch": 0.86, "grad_norm": 0.5093405123693988, "learning_rate": 6.336476291931702e-06, "loss": 0.4885, "step": 6773 }, { "epoch": 0.86, "grad_norm": 0.6065780791658569, "learning_rate": 6.3354822118788624e-06, "loss": 0.5449, "step": 6774 }, { "epoch": 0.86, "grad_norm": 0.7501777419214004, "learning_rate": 6.334488074978815e-06, "loss": 0.5172, "step": 6775 }, { "epoch": 0.86, "grad_norm": 0.7495723344250118, "learning_rate": 6.3334938812738734e-06, "loss": 0.5974, "step": 6776 }, { "epoch": 0.86, "grad_norm": 0.7150863817269848, "learning_rate": 6.332499630806359e-06, "loss": 0.5837, "step": 6777 }, { "epoch": 0.86, "grad_norm": 0.7851310025120265, "learning_rate": 6.3315053236185935e-06, "loss": 0.6754, "step": 6778 }, { "epoch": 0.86, "grad_norm": 0.7222751470063057, "learning_rate": 6.330510959752902e-06, "loss": 0.6121, "step": 6779 }, { "epoch": 0.86, "grad_norm": 0.9211548415234667, "learning_rate": 6.329516539251609e-06, "loss": 0.5665, "step": 6780 }, { "epoch": 0.86, "grad_norm": 0.5627713639444607, "learning_rate": 6.328522062157045e-06, "loss": 0.5454, "step": 6781 }, { "epoch": 0.86, "grad_norm": 0.8162151870739535, "learning_rate": 6.327527528511544e-06, "loss": 0.586, "step": 6782 }, { "epoch": 0.86, "grad_norm": 0.6706600262678507, "learning_rate": 6.326532938357438e-06, "loss": 0.554, "step": 6783 }, { "epoch": 0.86, "grad_norm": 0.7132656147409749, "learning_rate": 6.3255382917370634e-06, "loss": 0.6419, "step": 6784 }, { "epoch": 0.86, "grad_norm": 0.5587804142270424, "learning_rate": 6.32454358869276e-06, "loss": 0.5254, "step": 6785 }, { "epoch": 0.86, "grad_norm": 0.5622653100116567, "learning_rate": 6.323548829266867e-06, "loss": 0.5008, "step": 6786 }, { "epoch": 0.86, "grad_norm": 0.638376379275469, "learning_rate": 6.322554013501731e-06, "loss": 0.5835, "step": 6787 }, { "epoch": 0.86, "grad_norm": 0.6609193734697818, "learning_rate": 6.321559141439697e-06, "loss": 0.5525, "step": 6788 }, { "epoch": 0.86, "grad_norm": 0.6013839173079497, "learning_rate": 6.320564213123113e-06, "loss": 0.563, "step": 6789 }, { "epoch": 0.87, "grad_norm": 0.6008060519934829, "learning_rate": 6.319569228594331e-06, "loss": 0.4826, "step": 6790 }, { "epoch": 0.87, "grad_norm": 0.6721232126266182, "learning_rate": 6.318574187895703e-06, "loss": 0.5577, "step": 6791 }, { "epoch": 0.87, "grad_norm": 0.532686461543334, "learning_rate": 6.3175790910695845e-06, "loss": 0.5214, "step": 6792 }, { "epoch": 0.87, "grad_norm": 0.5937242019250427, "learning_rate": 6.316583938158336e-06, "loss": 0.4935, "step": 6793 }, { "epoch": 0.87, "grad_norm": 0.65146126396118, "learning_rate": 6.3155887292043164e-06, "loss": 0.495, "step": 6794 }, { "epoch": 0.87, "grad_norm": 0.7826559495731986, "learning_rate": 6.314593464249889e-06, "loss": 0.5865, "step": 6795 }, { "epoch": 0.87, "grad_norm": 0.6054753434688831, "learning_rate": 6.313598143337417e-06, "loss": 0.5177, "step": 6796 }, { "epoch": 0.87, "grad_norm": 0.7828603925191665, "learning_rate": 6.312602766509271e-06, "loss": 0.5594, "step": 6797 }, { "epoch": 0.87, "grad_norm": 0.5976344886995849, "learning_rate": 6.31160733380782e-06, "loss": 0.5399, "step": 6798 }, { "epoch": 0.87, "grad_norm": 0.7608514391926767, "learning_rate": 6.310611845275434e-06, "loss": 0.5393, "step": 6799 }, { "epoch": 0.87, "grad_norm": 0.7107898596213578, "learning_rate": 6.309616300954492e-06, "loss": 0.6108, "step": 6800 }, { "epoch": 0.87, "grad_norm": 0.608412541856247, "learning_rate": 6.308620700887368e-06, "loss": 0.5478, "step": 6801 }, { "epoch": 0.87, "grad_norm": 0.6038700595244411, "learning_rate": 6.307625045116443e-06, "loss": 0.5509, "step": 6802 }, { "epoch": 0.87, "grad_norm": 1.543431507757396, "learning_rate": 6.306629333684099e-06, "loss": 0.6488, "step": 6803 }, { "epoch": 0.87, "grad_norm": 0.6243108845268378, "learning_rate": 6.305633566632719e-06, "loss": 0.5401, "step": 6804 }, { "epoch": 0.87, "grad_norm": 0.8794502147054255, "learning_rate": 6.3046377440046895e-06, "loss": 0.6461, "step": 6805 }, { "epoch": 0.87, "grad_norm": 0.5867167302657583, "learning_rate": 6.3036418658424e-06, "loss": 0.5354, "step": 6806 }, { "epoch": 0.87, "grad_norm": 0.6031382569390703, "learning_rate": 6.3026459321882435e-06, "loss": 0.5559, "step": 6807 }, { "epoch": 0.87, "grad_norm": 0.5626801660294306, "learning_rate": 6.301649943084612e-06, "loss": 0.5391, "step": 6808 }, { "epoch": 0.87, "grad_norm": 0.7008523751937906, "learning_rate": 6.300653898573903e-06, "loss": 0.5719, "step": 6809 }, { "epoch": 0.87, "grad_norm": 0.6202315961834426, "learning_rate": 6.299657798698512e-06, "loss": 0.5435, "step": 6810 }, { "epoch": 0.87, "grad_norm": 0.7578252059528312, "learning_rate": 6.2986616435008415e-06, "loss": 0.6158, "step": 6811 }, { "epoch": 0.87, "grad_norm": 0.6815939664744214, "learning_rate": 6.297665433023295e-06, "loss": 0.554, "step": 6812 }, { "epoch": 0.87, "grad_norm": 0.7057340399865087, "learning_rate": 6.296669167308279e-06, "loss": 0.6013, "step": 6813 }, { "epoch": 0.87, "grad_norm": 0.6153740877570159, "learning_rate": 6.2956728463982e-06, "loss": 0.4992, "step": 6814 }, { "epoch": 0.87, "grad_norm": 0.5921791749086263, "learning_rate": 6.294676470335468e-06, "loss": 0.4803, "step": 6815 }, { "epoch": 0.87, "grad_norm": 0.7411634893068696, "learning_rate": 6.293680039162495e-06, "loss": 0.6038, "step": 6816 }, { "epoch": 0.87, "grad_norm": 0.6545407829797351, "learning_rate": 6.292683552921697e-06, "loss": 0.5949, "step": 6817 }, { "epoch": 0.87, "grad_norm": 0.7276433511793893, "learning_rate": 6.291687011655491e-06, "loss": 0.5784, "step": 6818 }, { "epoch": 0.87, "grad_norm": 0.7790355606637095, "learning_rate": 6.290690415406297e-06, "loss": 0.6273, "step": 6819 }, { "epoch": 0.87, "grad_norm": 0.8857327667942153, "learning_rate": 6.2896937642165354e-06, "loss": 0.5735, "step": 6820 }, { "epoch": 0.87, "grad_norm": 0.6715998134174386, "learning_rate": 6.2886970581286335e-06, "loss": 0.5292, "step": 6821 }, { "epoch": 0.87, "grad_norm": 0.7733025018650902, "learning_rate": 6.287700297185015e-06, "loss": 0.6041, "step": 6822 }, { "epoch": 0.87, "grad_norm": 0.7173777103567918, "learning_rate": 6.286703481428109e-06, "loss": 0.6402, "step": 6823 }, { "epoch": 0.87, "grad_norm": 0.642865696458957, "learning_rate": 6.2857066109003484e-06, "loss": 0.5225, "step": 6824 }, { "epoch": 0.87, "grad_norm": 0.7270170677998824, "learning_rate": 6.284709685644166e-06, "loss": 0.5884, "step": 6825 }, { "epoch": 0.87, "grad_norm": 0.5501905168697819, "learning_rate": 6.283712705701997e-06, "loss": 0.5299, "step": 6826 }, { "epoch": 0.87, "grad_norm": 0.5984007750484809, "learning_rate": 6.2827156711162805e-06, "loss": 0.5407, "step": 6827 }, { "epoch": 0.87, "grad_norm": 0.6576386282479347, "learning_rate": 6.281718581929457e-06, "loss": 0.5038, "step": 6828 }, { "epoch": 0.87, "grad_norm": 0.6924926115000757, "learning_rate": 6.280721438183969e-06, "loss": 0.5721, "step": 6829 }, { "epoch": 0.87, "grad_norm": 0.7452618188694167, "learning_rate": 6.279724239922261e-06, "loss": 0.5892, "step": 6830 }, { "epoch": 0.87, "grad_norm": 0.671576464309829, "learning_rate": 6.278726987186783e-06, "loss": 0.5177, "step": 6831 }, { "epoch": 0.87, "grad_norm": 0.7949311868949315, "learning_rate": 6.277729680019984e-06, "loss": 0.5861, "step": 6832 }, { "epoch": 0.87, "grad_norm": 0.6807662791089962, "learning_rate": 6.276732318464314e-06, "loss": 0.6374, "step": 6833 }, { "epoch": 0.87, "grad_norm": 0.6565947205416856, "learning_rate": 6.27573490256223e-06, "loss": 0.559, "step": 6834 }, { "epoch": 0.87, "grad_norm": 0.6133906144028313, "learning_rate": 6.274737432356187e-06, "loss": 0.5161, "step": 6835 }, { "epoch": 0.87, "grad_norm": 0.5959387865678492, "learning_rate": 6.273739907888645e-06, "loss": 0.5299, "step": 6836 }, { "epoch": 0.87, "grad_norm": 0.6739268664204052, "learning_rate": 6.272742329202066e-06, "loss": 0.561, "step": 6837 }, { "epoch": 0.87, "grad_norm": 0.9652026442339495, "learning_rate": 6.271744696338913e-06, "loss": 0.6284, "step": 6838 }, { "epoch": 0.87, "grad_norm": 0.6663780218655141, "learning_rate": 6.270747009341652e-06, "loss": 0.5483, "step": 6839 }, { "epoch": 0.87, "grad_norm": 0.6507891057327294, "learning_rate": 6.269749268252753e-06, "loss": 0.5399, "step": 6840 }, { "epoch": 0.87, "grad_norm": 0.6395465662584445, "learning_rate": 6.268751473114684e-06, "loss": 0.4978, "step": 6841 }, { "epoch": 0.87, "grad_norm": 0.7601071188929123, "learning_rate": 6.267753623969919e-06, "loss": 0.5551, "step": 6842 }, { "epoch": 0.87, "grad_norm": 0.8001905641435049, "learning_rate": 6.266755720860933e-06, "loss": 0.5876, "step": 6843 }, { "epoch": 0.87, "grad_norm": 0.7579055164094853, "learning_rate": 6.2657577638302045e-06, "loss": 0.5557, "step": 6844 }, { "epoch": 0.87, "grad_norm": 0.7746683846586531, "learning_rate": 6.2647597529202135e-06, "loss": 0.6439, "step": 6845 }, { "epoch": 0.87, "grad_norm": 0.9527593327937378, "learning_rate": 6.263761688173441e-06, "loss": 0.6052, "step": 6846 }, { "epoch": 0.87, "grad_norm": 0.5798480463925557, "learning_rate": 6.262763569632371e-06, "loss": 0.5342, "step": 6847 }, { "epoch": 0.87, "grad_norm": 0.7416176034640507, "learning_rate": 6.261765397339491e-06, "loss": 0.5626, "step": 6848 }, { "epoch": 0.87, "grad_norm": 0.6264930743913123, "learning_rate": 6.26076717133729e-06, "loss": 0.5739, "step": 6849 }, { "epoch": 0.87, "grad_norm": 0.8851478382258816, "learning_rate": 6.259768891668261e-06, "loss": 0.6162, "step": 6850 }, { "epoch": 0.87, "grad_norm": 0.8280457744640813, "learning_rate": 6.2587705583748945e-06, "loss": 0.5486, "step": 6851 }, { "epoch": 0.87, "grad_norm": 0.6002522558786202, "learning_rate": 6.257772171499687e-06, "loss": 0.5568, "step": 6852 }, { "epoch": 0.87, "grad_norm": 0.9150153674525723, "learning_rate": 6.256773731085139e-06, "loss": 0.6725, "step": 6853 }, { "epoch": 0.87, "grad_norm": 0.7827022016153788, "learning_rate": 6.255775237173749e-06, "loss": 0.6007, "step": 6854 }, { "epoch": 0.87, "grad_norm": 0.5676984860950057, "learning_rate": 6.25477668980802e-06, "loss": 0.5371, "step": 6855 }, { "epoch": 0.87, "grad_norm": 0.6547096461774186, "learning_rate": 6.2537780890304565e-06, "loss": 0.5605, "step": 6856 }, { "epoch": 0.87, "grad_norm": 0.623009421679491, "learning_rate": 6.2527794348835666e-06, "loss": 0.4892, "step": 6857 }, { "epoch": 0.87, "grad_norm": 0.543909343147493, "learning_rate": 6.251780727409861e-06, "loss": 0.512, "step": 6858 }, { "epoch": 0.87, "grad_norm": 0.6814961859316504, "learning_rate": 6.250781966651847e-06, "loss": 0.525, "step": 6859 }, { "epoch": 0.87, "grad_norm": 0.7807760097938817, "learning_rate": 6.249783152652045e-06, "loss": 0.5737, "step": 6860 }, { "epoch": 0.87, "grad_norm": 0.7379681688085484, "learning_rate": 6.248784285452964e-06, "loss": 0.5696, "step": 6861 }, { "epoch": 0.87, "grad_norm": 0.691373783756208, "learning_rate": 6.247785365097129e-06, "loss": 0.5181, "step": 6862 }, { "epoch": 0.87, "grad_norm": 0.6137904787440552, "learning_rate": 6.246786391627058e-06, "loss": 0.5563, "step": 6863 }, { "epoch": 0.87, "grad_norm": 0.5588059699715074, "learning_rate": 6.245787365085274e-06, "loss": 0.5149, "step": 6864 }, { "epoch": 0.87, "grad_norm": 0.7195083567222281, "learning_rate": 6.244788285514304e-06, "loss": 0.5248, "step": 6865 }, { "epoch": 0.87, "grad_norm": 0.6688699464853312, "learning_rate": 6.2437891529566745e-06, "loss": 0.6034, "step": 6866 }, { "epoch": 0.87, "grad_norm": 0.6131091843852191, "learning_rate": 6.242789967454913e-06, "loss": 0.5273, "step": 6867 }, { "epoch": 0.87, "grad_norm": 0.5423820291017064, "learning_rate": 6.241790729051555e-06, "loss": 0.4823, "step": 6868 }, { "epoch": 0.88, "grad_norm": 0.542137685061483, "learning_rate": 6.2407914377891355e-06, "loss": 0.5467, "step": 6869 }, { "epoch": 0.88, "grad_norm": 0.7002896528417133, "learning_rate": 6.239792093710189e-06, "loss": 0.6, "step": 6870 }, { "epoch": 0.88, "grad_norm": 0.8505814826815032, "learning_rate": 6.238792696857253e-06, "loss": 0.6131, "step": 6871 }, { "epoch": 0.88, "grad_norm": 0.5473859067486834, "learning_rate": 6.237793247272872e-06, "loss": 0.4759, "step": 6872 }, { "epoch": 0.88, "grad_norm": 0.899652362697501, "learning_rate": 6.236793744999587e-06, "loss": 0.6738, "step": 6873 }, { "epoch": 0.88, "grad_norm": 0.8467807264115054, "learning_rate": 6.2357941900799455e-06, "loss": 0.6142, "step": 6874 }, { "epoch": 0.88, "grad_norm": 0.6985128921024746, "learning_rate": 6.234794582556494e-06, "loss": 0.5553, "step": 6875 }, { "epoch": 0.88, "grad_norm": 0.7619096079649466, "learning_rate": 6.233794922471783e-06, "loss": 0.612, "step": 6876 }, { "epoch": 0.88, "grad_norm": 0.6663012159165116, "learning_rate": 6.232795209868365e-06, "loss": 0.522, "step": 6877 }, { "epoch": 0.88, "grad_norm": 0.7446578268848328, "learning_rate": 6.231795444788794e-06, "loss": 0.6283, "step": 6878 }, { "epoch": 0.88, "grad_norm": 2.5450036146527193, "learning_rate": 6.230795627275626e-06, "loss": 0.5677, "step": 6879 }, { "epoch": 0.88, "grad_norm": 0.758724870171053, "learning_rate": 6.229795757371423e-06, "loss": 0.5633, "step": 6880 }, { "epoch": 0.88, "grad_norm": 0.8175139850721489, "learning_rate": 6.228795835118744e-06, "loss": 0.6056, "step": 6881 }, { "epoch": 0.88, "grad_norm": 0.6643489122575102, "learning_rate": 6.227795860560153e-06, "loss": 0.5483, "step": 6882 }, { "epoch": 0.88, "grad_norm": 0.7445691382149575, "learning_rate": 6.226795833738216e-06, "loss": 0.6273, "step": 6883 }, { "epoch": 0.88, "grad_norm": 0.5875772040990337, "learning_rate": 6.225795754695501e-06, "loss": 0.5116, "step": 6884 }, { "epoch": 0.88, "grad_norm": 0.9325925337166909, "learning_rate": 6.224795623474576e-06, "loss": 0.6087, "step": 6885 }, { "epoch": 0.88, "grad_norm": 0.5917189443119621, "learning_rate": 6.223795440118015e-06, "loss": 0.5172, "step": 6886 }, { "epoch": 0.88, "grad_norm": 0.6571296009952919, "learning_rate": 6.2227952046683945e-06, "loss": 0.5163, "step": 6887 }, { "epoch": 0.88, "grad_norm": 0.8671543582304182, "learning_rate": 6.22179491716829e-06, "loss": 0.637, "step": 6888 }, { "epoch": 0.88, "grad_norm": 0.599192086806491, "learning_rate": 6.22079457766028e-06, "loss": 0.5182, "step": 6889 }, { "epoch": 0.88, "grad_norm": 0.7168495563588103, "learning_rate": 6.219794186186945e-06, "loss": 0.6026, "step": 6890 }, { "epoch": 0.88, "grad_norm": 0.6995747763004064, "learning_rate": 6.21879374279087e-06, "loss": 0.5996, "step": 6891 }, { "epoch": 0.88, "grad_norm": 0.6169746328907965, "learning_rate": 6.217793247514638e-06, "loss": 0.513, "step": 6892 }, { "epoch": 0.88, "grad_norm": 1.2195289350244276, "learning_rate": 6.216792700400841e-06, "loss": 0.6221, "step": 6893 }, { "epoch": 0.88, "grad_norm": 0.749395846257989, "learning_rate": 6.215792101492068e-06, "loss": 0.6589, "step": 6894 }, { "epoch": 0.88, "grad_norm": 0.6770192456971336, "learning_rate": 6.214791450830908e-06, "loss": 0.5235, "step": 6895 }, { "epoch": 0.88, "grad_norm": 0.5865526980333283, "learning_rate": 6.2137907484599605e-06, "loss": 0.5403, "step": 6896 }, { "epoch": 0.88, "grad_norm": 0.8193177300012351, "learning_rate": 6.212789994421817e-06, "loss": 0.6289, "step": 6897 }, { "epoch": 0.88, "grad_norm": 0.9135725602561945, "learning_rate": 6.21178918875908e-06, "loss": 0.5734, "step": 6898 }, { "epoch": 0.88, "grad_norm": 1.0049178283070277, "learning_rate": 6.210788331514349e-06, "loss": 0.6387, "step": 6899 }, { "epoch": 0.88, "grad_norm": 0.8394995515526554, "learning_rate": 6.209787422730229e-06, "loss": 0.6422, "step": 6900 }, { "epoch": 0.88, "grad_norm": 0.7368126956076453, "learning_rate": 6.208786462449323e-06, "loss": 0.6024, "step": 6901 }, { "epoch": 0.88, "grad_norm": 0.8384003347264457, "learning_rate": 6.207785450714242e-06, "loss": 0.6018, "step": 6902 }, { "epoch": 0.88, "grad_norm": 0.7074314150809423, "learning_rate": 6.206784387567592e-06, "loss": 0.5318, "step": 6903 }, { "epoch": 0.88, "grad_norm": 0.5627536859957167, "learning_rate": 6.205783273051988e-06, "loss": 0.5226, "step": 6904 }, { "epoch": 0.88, "grad_norm": 0.6223252024920127, "learning_rate": 6.204782107210044e-06, "loss": 0.5519, "step": 6905 }, { "epoch": 0.88, "grad_norm": 0.6013096013428314, "learning_rate": 6.203780890084374e-06, "loss": 0.4471, "step": 6906 }, { "epoch": 0.88, "grad_norm": 0.6376797279349884, "learning_rate": 6.2027796217175985e-06, "loss": 0.5521, "step": 6907 }, { "epoch": 0.88, "grad_norm": 0.9504777812926263, "learning_rate": 6.20177830215234e-06, "loss": 0.6301, "step": 6908 }, { "epoch": 0.88, "grad_norm": 0.957302861659713, "learning_rate": 6.200776931431216e-06, "loss": 0.6086, "step": 6909 }, { "epoch": 0.88, "grad_norm": 0.9441578880772518, "learning_rate": 6.199775509596857e-06, "loss": 0.655, "step": 6910 }, { "epoch": 0.88, "grad_norm": 0.7527162940865, "learning_rate": 6.198774036691888e-06, "loss": 0.5941, "step": 6911 }, { "epoch": 0.88, "grad_norm": 0.5929544365399939, "learning_rate": 6.197772512758939e-06, "loss": 0.512, "step": 6912 }, { "epoch": 0.88, "grad_norm": 0.5774386893457697, "learning_rate": 6.1967709378406425e-06, "loss": 0.5229, "step": 6913 }, { "epoch": 0.88, "grad_norm": 0.7774892969808319, "learning_rate": 6.195769311979631e-06, "loss": 0.6513, "step": 6914 }, { "epoch": 0.88, "grad_norm": 0.8135022353073249, "learning_rate": 6.194767635218541e-06, "loss": 0.6416, "step": 6915 }, { "epoch": 0.88, "grad_norm": 0.8896958118115137, "learning_rate": 6.193765907600011e-06, "loss": 0.5814, "step": 6916 }, { "epoch": 0.88, "grad_norm": 0.628869665013241, "learning_rate": 6.192764129166681e-06, "loss": 0.5466, "step": 6917 }, { "epoch": 0.88, "grad_norm": 0.5756931612956352, "learning_rate": 6.191762299961194e-06, "loss": 0.5114, "step": 6918 }, { "epoch": 0.88, "grad_norm": 0.5808018003374665, "learning_rate": 6.190760420026193e-06, "loss": 0.524, "step": 6919 }, { "epoch": 0.88, "grad_norm": 0.6915554516000589, "learning_rate": 6.189758489404327e-06, "loss": 0.5687, "step": 6920 }, { "epoch": 0.88, "grad_norm": 0.636161627714552, "learning_rate": 6.1887565081382435e-06, "loss": 0.6299, "step": 6921 }, { "epoch": 0.88, "grad_norm": 0.6774435912803495, "learning_rate": 6.187754476270595e-06, "loss": 0.5514, "step": 6922 }, { "epoch": 0.88, "grad_norm": 0.8193920191532753, "learning_rate": 6.186752393844032e-06, "loss": 0.5995, "step": 6923 }, { "epoch": 0.88, "grad_norm": 1.0017311733553744, "learning_rate": 6.185750260901213e-06, "loss": 0.5734, "step": 6924 }, { "epoch": 0.88, "grad_norm": 0.9726053865363808, "learning_rate": 6.184748077484796e-06, "loss": 0.6573, "step": 6925 }, { "epoch": 0.88, "grad_norm": 0.7189541834067743, "learning_rate": 6.183745843637437e-06, "loss": 0.594, "step": 6926 }, { "epoch": 0.88, "grad_norm": 0.7879463764308462, "learning_rate": 6.182743559401801e-06, "loss": 0.664, "step": 6927 }, { "epoch": 0.88, "grad_norm": 0.6473155588635416, "learning_rate": 6.181741224820552e-06, "loss": 0.4793, "step": 6928 }, { "epoch": 0.88, "grad_norm": 0.7817179856107083, "learning_rate": 6.180738839936354e-06, "loss": 0.5041, "step": 6929 }, { "epoch": 0.88, "grad_norm": 0.5812415751280942, "learning_rate": 6.179736404791877e-06, "loss": 0.5268, "step": 6930 }, { "epoch": 0.88, "grad_norm": 0.6793739797508815, "learning_rate": 6.178733919429793e-06, "loss": 0.5722, "step": 6931 }, { "epoch": 0.88, "grad_norm": 0.6494114722260511, "learning_rate": 6.177731383892771e-06, "loss": 0.5401, "step": 6932 }, { "epoch": 0.88, "grad_norm": 0.5785418031395532, "learning_rate": 6.176728798223488e-06, "loss": 0.5074, "step": 6933 }, { "epoch": 0.88, "grad_norm": 0.5861779976293513, "learning_rate": 6.1757261624646194e-06, "loss": 0.5502, "step": 6934 }, { "epoch": 0.88, "grad_norm": 0.6706083698895646, "learning_rate": 6.174723476658845e-06, "loss": 0.5165, "step": 6935 }, { "epoch": 0.88, "grad_norm": 0.6137462044331675, "learning_rate": 6.1737207408488474e-06, "loss": 0.539, "step": 6936 }, { "epoch": 0.88, "grad_norm": 0.6274706029119311, "learning_rate": 6.172717955077309e-06, "loss": 0.5049, "step": 6937 }, { "epoch": 0.88, "grad_norm": 0.6239881438203184, "learning_rate": 6.171715119386913e-06, "loss": 0.5485, "step": 6938 }, { "epoch": 0.88, "grad_norm": 0.8374711885935975, "learning_rate": 6.1707122338203496e-06, "loss": 0.576, "step": 6939 }, { "epoch": 0.88, "grad_norm": 0.778003576136719, "learning_rate": 6.169709298420308e-06, "loss": 0.6208, "step": 6940 }, { "epoch": 0.88, "grad_norm": 0.6405126665539604, "learning_rate": 6.1687063132294776e-06, "loss": 0.5289, "step": 6941 }, { "epoch": 0.88, "grad_norm": 0.7634993817468099, "learning_rate": 6.167703278290556e-06, "loss": 0.6004, "step": 6942 }, { "epoch": 0.88, "grad_norm": 0.9057512115608788, "learning_rate": 6.166700193646235e-06, "loss": 0.653, "step": 6943 }, { "epoch": 0.88, "grad_norm": 0.5547006559169239, "learning_rate": 6.165697059339218e-06, "loss": 0.5182, "step": 6944 }, { "epoch": 0.88, "grad_norm": 0.5080745747688111, "learning_rate": 6.1646938754122e-06, "loss": 0.455, "step": 6945 }, { "epoch": 0.88, "grad_norm": 0.5615632718437535, "learning_rate": 6.1636906419078865e-06, "loss": 0.4883, "step": 6946 }, { "epoch": 0.89, "grad_norm": 0.6593833915263615, "learning_rate": 6.162687358868979e-06, "loss": 0.5638, "step": 6947 }, { "epoch": 0.89, "grad_norm": 0.8758431849895096, "learning_rate": 6.161684026338188e-06, "loss": 0.5644, "step": 6948 }, { "epoch": 0.89, "grad_norm": 0.7714982101403007, "learning_rate": 6.160680644358221e-06, "loss": 0.6517, "step": 6949 }, { "epoch": 0.89, "grad_norm": 0.7229073069281465, "learning_rate": 6.159677212971788e-06, "loss": 0.5827, "step": 6950 }, { "epoch": 0.89, "grad_norm": 0.7170060430273959, "learning_rate": 6.158673732221601e-06, "loss": 0.6252, "step": 6951 }, { "epoch": 0.89, "grad_norm": 0.6044232803295256, "learning_rate": 6.157670202150374e-06, "loss": 0.5583, "step": 6952 }, { "epoch": 0.89, "grad_norm": 0.6175731398380181, "learning_rate": 6.156666622800829e-06, "loss": 0.4854, "step": 6953 }, { "epoch": 0.89, "grad_norm": 0.726168022342418, "learning_rate": 6.155662994215679e-06, "loss": 0.6317, "step": 6954 }, { "epoch": 0.89, "grad_norm": 1.1010979581015248, "learning_rate": 6.15465931643765e-06, "loss": 0.7149, "step": 6955 }, { "epoch": 0.89, "grad_norm": 0.5951092840843372, "learning_rate": 6.153655589509464e-06, "loss": 0.4564, "step": 6956 }, { "epoch": 0.89, "grad_norm": 0.7322830194642305, "learning_rate": 6.1526518134738455e-06, "loss": 0.6375, "step": 6957 }, { "epoch": 0.89, "grad_norm": 0.7118970209441388, "learning_rate": 6.1516479883735234e-06, "loss": 0.5813, "step": 6958 }, { "epoch": 0.89, "grad_norm": 0.6104637232344424, "learning_rate": 6.150644114251226e-06, "loss": 0.5411, "step": 6959 }, { "epoch": 0.89, "grad_norm": 0.613150555109829, "learning_rate": 6.149640191149684e-06, "loss": 0.5812, "step": 6960 }, { "epoch": 0.89, "grad_norm": 0.5887407967297529, "learning_rate": 6.148636219111635e-06, "loss": 0.5466, "step": 6961 }, { "epoch": 0.89, "grad_norm": 0.6929918455206895, "learning_rate": 6.147632198179813e-06, "loss": 0.5655, "step": 6962 }, { "epoch": 0.89, "grad_norm": 0.6886215737073035, "learning_rate": 6.1466281283969545e-06, "loss": 0.5481, "step": 6963 }, { "epoch": 0.89, "grad_norm": 0.6728231630783966, "learning_rate": 6.1456240098058005e-06, "loss": 0.5506, "step": 6964 }, { "epoch": 0.89, "grad_norm": 0.72738416349824, "learning_rate": 6.144619842449094e-06, "loss": 0.5739, "step": 6965 }, { "epoch": 0.89, "grad_norm": 0.6921865149606771, "learning_rate": 6.143615626369578e-06, "loss": 0.5789, "step": 6966 }, { "epoch": 0.89, "grad_norm": 0.7517713041007731, "learning_rate": 6.1426113616099995e-06, "loss": 0.603, "step": 6967 }, { "epoch": 0.89, "grad_norm": 0.7732631404374873, "learning_rate": 6.141607048213107e-06, "loss": 0.6654, "step": 6968 }, { "epoch": 0.89, "grad_norm": 0.8065041814625559, "learning_rate": 6.14060268622165e-06, "loss": 0.4992, "step": 6969 }, { "epoch": 0.89, "grad_norm": 0.7775832453012291, "learning_rate": 6.139598275678381e-06, "loss": 0.6143, "step": 6970 }, { "epoch": 0.89, "grad_norm": 0.8101687830242854, "learning_rate": 6.138593816626055e-06, "loss": 0.5874, "step": 6971 }, { "epoch": 0.89, "grad_norm": 0.7719784037915997, "learning_rate": 6.137589309107429e-06, "loss": 0.5557, "step": 6972 }, { "epoch": 0.89, "grad_norm": 0.6989662084373551, "learning_rate": 6.136584753165262e-06, "loss": 0.5359, "step": 6973 }, { "epoch": 0.89, "grad_norm": 0.663647152962222, "learning_rate": 6.135580148842314e-06, "loss": 0.5134, "step": 6974 }, { "epoch": 0.89, "grad_norm": 0.7800516951878073, "learning_rate": 6.134575496181348e-06, "loss": 0.6692, "step": 6975 }, { "epoch": 0.89, "grad_norm": 0.6679711901363279, "learning_rate": 6.133570795225128e-06, "loss": 0.4585, "step": 6976 }, { "epoch": 0.89, "grad_norm": 0.6182815652001041, "learning_rate": 6.132566046016422e-06, "loss": 0.5263, "step": 6977 }, { "epoch": 0.89, "grad_norm": 0.5417296473212366, "learning_rate": 6.131561248597997e-06, "loss": 0.5297, "step": 6978 }, { "epoch": 0.89, "grad_norm": 0.6054942574934512, "learning_rate": 6.1305564030126264e-06, "loss": 0.5422, "step": 6979 }, { "epoch": 0.89, "grad_norm": 0.6145197552505882, "learning_rate": 6.1295515093030835e-06, "loss": 0.5241, "step": 6980 }, { "epoch": 0.89, "grad_norm": 0.6276337869361472, "learning_rate": 6.1285465675121414e-06, "loss": 0.541, "step": 6981 }, { "epoch": 0.89, "grad_norm": 0.6780973649138582, "learning_rate": 6.1275415776825795e-06, "loss": 0.5792, "step": 6982 }, { "epoch": 0.89, "grad_norm": 0.6101682433886345, "learning_rate": 6.1265365398571755e-06, "loss": 0.5689, "step": 6983 }, { "epoch": 0.89, "grad_norm": 0.7210033167807183, "learning_rate": 6.1255314540787105e-06, "loss": 0.583, "step": 6984 }, { "epoch": 0.89, "grad_norm": 0.7737271743546085, "learning_rate": 6.1245263203899665e-06, "loss": 0.6431, "step": 6985 }, { "epoch": 0.89, "grad_norm": 0.7812190187635045, "learning_rate": 6.123521138833732e-06, "loss": 0.5542, "step": 6986 }, { "epoch": 0.89, "grad_norm": 0.5628385126075388, "learning_rate": 6.122515909452793e-06, "loss": 0.536, "step": 6987 }, { "epoch": 0.89, "grad_norm": 0.6866876473192836, "learning_rate": 6.121510632289939e-06, "loss": 0.5375, "step": 6988 }, { "epoch": 0.89, "grad_norm": 0.8312761759667844, "learning_rate": 6.1205053073879605e-06, "loss": 0.6062, "step": 6989 }, { "epoch": 0.89, "grad_norm": 0.5482930227360178, "learning_rate": 6.119499934789654e-06, "loss": 0.4728, "step": 6990 }, { "epoch": 0.89, "grad_norm": 0.6022037195005886, "learning_rate": 6.118494514537809e-06, "loss": 0.5124, "step": 6991 }, { "epoch": 0.89, "grad_norm": 0.6006512478769933, "learning_rate": 6.117489046675229e-06, "loss": 0.5193, "step": 6992 }, { "epoch": 0.89, "grad_norm": 0.6906369730060452, "learning_rate": 6.11648353124471e-06, "loss": 0.5501, "step": 6993 }, { "epoch": 0.89, "grad_norm": 0.7418386740062759, "learning_rate": 6.115477968289057e-06, "loss": 0.553, "step": 6994 }, { "epoch": 0.89, "grad_norm": 0.6265433495134833, "learning_rate": 6.114472357851069e-06, "loss": 0.53, "step": 6995 }, { "epoch": 0.89, "grad_norm": 0.6038562518554125, "learning_rate": 6.1134666999735555e-06, "loss": 0.5465, "step": 6996 }, { "epoch": 0.89, "grad_norm": 0.7887925635533009, "learning_rate": 6.1124609946993215e-06, "loss": 0.6539, "step": 6997 }, { "epoch": 0.89, "grad_norm": 0.7531312921636764, "learning_rate": 6.1114552420711795e-06, "loss": 0.5588, "step": 6998 }, { "epoch": 0.89, "grad_norm": 0.786681441606181, "learning_rate": 6.1104494421319385e-06, "loss": 0.6205, "step": 6999 }, { "epoch": 0.89, "grad_norm": 0.6395401569832504, "learning_rate": 6.1094435949244135e-06, "loss": 0.5425, "step": 7000 }, { "epoch": 0.89, "grad_norm": 0.6192397084103954, "learning_rate": 6.1084377004914195e-06, "loss": 0.5775, "step": 7001 }, { "epoch": 0.89, "grad_norm": 0.6416149970841349, "learning_rate": 6.107431758875776e-06, "loss": 0.5446, "step": 7002 }, { "epoch": 0.89, "grad_norm": 0.6091182591298585, "learning_rate": 6.106425770120299e-06, "loss": 0.5454, "step": 7003 }, { "epoch": 0.89, "grad_norm": 0.6439199776065763, "learning_rate": 6.105419734267815e-06, "loss": 0.514, "step": 7004 }, { "epoch": 0.89, "grad_norm": 1.0701801205887538, "learning_rate": 6.104413651361144e-06, "loss": 0.5988, "step": 7005 }, { "epoch": 0.89, "grad_norm": 0.832089162578337, "learning_rate": 6.103407521443115e-06, "loss": 0.5974, "step": 7006 }, { "epoch": 0.89, "grad_norm": 0.8201899620076895, "learning_rate": 6.1024013445565515e-06, "loss": 0.6743, "step": 7007 }, { "epoch": 0.89, "grad_norm": 0.7319994524681279, "learning_rate": 6.101395120744286e-06, "loss": 0.5104, "step": 7008 }, { "epoch": 0.89, "grad_norm": 0.5550222837796425, "learning_rate": 6.100388850049149e-06, "loss": 0.5644, "step": 7009 }, { "epoch": 0.89, "grad_norm": 0.5438093201350016, "learning_rate": 6.099382532513976e-06, "loss": 0.5014, "step": 7010 }, { "epoch": 0.89, "grad_norm": 0.715979102804903, "learning_rate": 6.098376168181602e-06, "loss": 0.6365, "step": 7011 }, { "epoch": 0.89, "grad_norm": 0.6219142752938432, "learning_rate": 6.097369757094864e-06, "loss": 0.5332, "step": 7012 }, { "epoch": 0.89, "grad_norm": 0.5800098704411566, "learning_rate": 6.0963632992966036e-06, "loss": 0.5211, "step": 7013 }, { "epoch": 0.89, "grad_norm": 0.6475692842033535, "learning_rate": 6.09535679482966e-06, "loss": 0.5382, "step": 7014 }, { "epoch": 0.89, "grad_norm": 0.6349157139075965, "learning_rate": 6.094350243736878e-06, "loss": 0.4815, "step": 7015 }, { "epoch": 0.89, "grad_norm": 0.9500280625936495, "learning_rate": 6.093343646061101e-06, "loss": 0.6404, "step": 7016 }, { "epoch": 0.89, "grad_norm": 0.5721372743526819, "learning_rate": 6.092337001845181e-06, "loss": 0.5161, "step": 7017 }, { "epoch": 0.89, "grad_norm": 0.9799732367864387, "learning_rate": 6.091330311131965e-06, "loss": 0.5629, "step": 7018 }, { "epoch": 0.89, "grad_norm": 0.5914921112517635, "learning_rate": 6.090323573964305e-06, "loss": 0.5108, "step": 7019 }, { "epoch": 0.89, "grad_norm": 0.5518055965519464, "learning_rate": 6.089316790385056e-06, "loss": 0.4329, "step": 7020 }, { "epoch": 0.89, "grad_norm": 0.8094592049857515, "learning_rate": 6.088309960437071e-06, "loss": 0.6236, "step": 7021 }, { "epoch": 0.89, "grad_norm": 0.7519590278852772, "learning_rate": 6.087303084163207e-06, "loss": 0.5772, "step": 7022 }, { "epoch": 0.89, "grad_norm": 0.6435099144700268, "learning_rate": 6.086296161606328e-06, "loss": 0.504, "step": 7023 }, { "epoch": 0.89, "grad_norm": 0.7090464668951559, "learning_rate": 6.085289192809291e-06, "loss": 0.6317, "step": 7024 }, { "epoch": 0.89, "grad_norm": 0.5962875285809118, "learning_rate": 6.084282177814962e-06, "loss": 0.5077, "step": 7025 }, { "epoch": 0.9, "grad_norm": 0.722109036630549, "learning_rate": 6.083275116666206e-06, "loss": 0.5609, "step": 7026 }, { "epoch": 0.9, "grad_norm": 0.7958360727980037, "learning_rate": 6.0822680094058885e-06, "loss": 0.5995, "step": 7027 }, { "epoch": 0.9, "grad_norm": 0.5849184702604707, "learning_rate": 6.081260856076882e-06, "loss": 0.4885, "step": 7028 }, { "epoch": 0.9, "grad_norm": 0.5601944460241765, "learning_rate": 6.0802536567220546e-06, "loss": 0.4801, "step": 7029 }, { "epoch": 0.9, "grad_norm": 0.8724346766325471, "learning_rate": 6.079246411384282e-06, "loss": 0.624, "step": 7030 }, { "epoch": 0.9, "grad_norm": 0.6521322783295203, "learning_rate": 6.0782391201064374e-06, "loss": 0.4872, "step": 7031 }, { "epoch": 0.9, "grad_norm": 0.7911287449441682, "learning_rate": 6.0772317829313994e-06, "loss": 0.6163, "step": 7032 }, { "epoch": 0.9, "grad_norm": 0.9396882391719757, "learning_rate": 6.0762243999020465e-06, "loss": 0.5203, "step": 7033 }, { "epoch": 0.9, "grad_norm": 0.5714283129024395, "learning_rate": 6.0752169710612605e-06, "loss": 0.5076, "step": 7034 }, { "epoch": 0.9, "grad_norm": 0.7593724104306929, "learning_rate": 6.074209496451924e-06, "loss": 0.6125, "step": 7035 }, { "epoch": 0.9, "grad_norm": 0.6438712476384332, "learning_rate": 6.073201976116923e-06, "loss": 0.5102, "step": 7036 }, { "epoch": 0.9, "grad_norm": 0.5416491414306279, "learning_rate": 6.072194410099142e-06, "loss": 0.4788, "step": 7037 }, { "epoch": 0.9, "grad_norm": 0.7547145890173839, "learning_rate": 6.07118679844147e-06, "loss": 0.5941, "step": 7038 }, { "epoch": 0.9, "grad_norm": 0.6843022383752474, "learning_rate": 6.070179141186802e-06, "loss": 0.5519, "step": 7039 }, { "epoch": 0.9, "grad_norm": 0.6489889671240688, "learning_rate": 6.069171438378025e-06, "loss": 0.5307, "step": 7040 }, { "epoch": 0.9, "grad_norm": 0.6018158642552033, "learning_rate": 6.068163690058038e-06, "loss": 0.4985, "step": 7041 }, { "epoch": 0.9, "grad_norm": 0.6810515386416672, "learning_rate": 6.067155896269735e-06, "loss": 0.5313, "step": 7042 }, { "epoch": 0.9, "grad_norm": 0.6795412536103733, "learning_rate": 6.066148057056017e-06, "loss": 0.5429, "step": 7043 }, { "epoch": 0.9, "grad_norm": 0.5638607829429544, "learning_rate": 6.065140172459782e-06, "loss": 0.4877, "step": 7044 }, { "epoch": 0.9, "grad_norm": 0.6394401456542274, "learning_rate": 6.064132242523935e-06, "loss": 0.5457, "step": 7045 }, { "epoch": 0.9, "grad_norm": 0.5914186049789284, "learning_rate": 6.063124267291378e-06, "loss": 0.525, "step": 7046 }, { "epoch": 0.9, "grad_norm": 0.794023482227983, "learning_rate": 6.0621162468050165e-06, "loss": 0.5801, "step": 7047 }, { "epoch": 0.9, "grad_norm": 0.632927484052232, "learning_rate": 6.061108181107762e-06, "loss": 0.5272, "step": 7048 }, { "epoch": 0.9, "grad_norm": 0.8562898361911961, "learning_rate": 6.060100070242524e-06, "loss": 0.6606, "step": 7049 }, { "epoch": 0.9, "grad_norm": 0.6690535977493315, "learning_rate": 6.059091914252213e-06, "loss": 0.5403, "step": 7050 }, { "epoch": 0.9, "grad_norm": 0.519046681801265, "learning_rate": 6.058083713179743e-06, "loss": 0.4995, "step": 7051 }, { "epoch": 0.9, "grad_norm": 0.6134678210055258, "learning_rate": 6.057075467068031e-06, "loss": 0.5696, "step": 7052 }, { "epoch": 0.9, "grad_norm": 0.7754917113503742, "learning_rate": 6.056067175959993e-06, "loss": 0.5921, "step": 7053 }, { "epoch": 0.9, "grad_norm": 0.527923556579244, "learning_rate": 6.055058839898551e-06, "loss": 0.4658, "step": 7054 }, { "epoch": 0.9, "grad_norm": 0.7248729441933248, "learning_rate": 6.054050458926626e-06, "loss": 0.5888, "step": 7055 }, { "epoch": 0.9, "grad_norm": 0.8296417343177384, "learning_rate": 6.053042033087141e-06, "loss": 0.6083, "step": 7056 }, { "epoch": 0.9, "grad_norm": 0.6675126820356825, "learning_rate": 6.052033562423022e-06, "loss": 0.56, "step": 7057 }, { "epoch": 0.9, "grad_norm": 0.668743435739018, "learning_rate": 6.051025046977196e-06, "loss": 0.5379, "step": 7058 }, { "epoch": 0.9, "grad_norm": 0.7949294816777245, "learning_rate": 6.050016486792591e-06, "loss": 0.6224, "step": 7059 }, { "epoch": 0.9, "grad_norm": 0.6205793233071754, "learning_rate": 6.049007881912141e-06, "loss": 0.5338, "step": 7060 }, { "epoch": 0.9, "grad_norm": 0.6408703407888654, "learning_rate": 6.047999232378777e-06, "loss": 0.5325, "step": 7061 }, { "epoch": 0.9, "grad_norm": 0.729530647059725, "learning_rate": 6.046990538235435e-06, "loss": 0.5263, "step": 7062 }, { "epoch": 0.9, "grad_norm": 0.8706779640023593, "learning_rate": 6.045981799525051e-06, "loss": 0.6181, "step": 7063 }, { "epoch": 0.9, "grad_norm": 0.5438266929594107, "learning_rate": 6.0449730162905654e-06, "loss": 0.4777, "step": 7064 }, { "epoch": 0.9, "grad_norm": 0.7359860876038248, "learning_rate": 6.043964188574915e-06, "loss": 0.5942, "step": 7065 }, { "epoch": 0.9, "grad_norm": 0.7241323835662788, "learning_rate": 6.042955316421049e-06, "loss": 0.6209, "step": 7066 }, { "epoch": 0.9, "grad_norm": 0.8035127333265772, "learning_rate": 6.041946399871905e-06, "loss": 0.5655, "step": 7067 }, { "epoch": 0.9, "grad_norm": 0.6956269386666362, "learning_rate": 6.0409374389704335e-06, "loss": 0.5964, "step": 7068 }, { "epoch": 0.9, "grad_norm": 0.6660078947711036, "learning_rate": 6.039928433759582e-06, "loss": 0.5378, "step": 7069 }, { "epoch": 0.9, "grad_norm": 0.589503771019717, "learning_rate": 6.0389193842823e-06, "loss": 0.5409, "step": 7070 }, { "epoch": 0.9, "grad_norm": 0.5866593047121362, "learning_rate": 6.037910290581538e-06, "loss": 0.4917, "step": 7071 }, { "epoch": 0.9, "grad_norm": 0.6604860005466706, "learning_rate": 6.036901152700253e-06, "loss": 0.5242, "step": 7072 }, { "epoch": 0.9, "grad_norm": 0.5987571440606564, "learning_rate": 6.0358919706814e-06, "loss": 0.5085, "step": 7073 }, { "epoch": 0.9, "grad_norm": 0.6245365173020011, "learning_rate": 6.034882744567936e-06, "loss": 0.4873, "step": 7074 }, { "epoch": 0.9, "grad_norm": 0.6262500080672581, "learning_rate": 6.033873474402819e-06, "loss": 0.4865, "step": 7075 }, { "epoch": 0.9, "grad_norm": 0.6391414222372909, "learning_rate": 6.032864160229014e-06, "loss": 0.5861, "step": 7076 }, { "epoch": 0.9, "grad_norm": 0.6128183206857968, "learning_rate": 6.0318548020894805e-06, "loss": 0.5292, "step": 7077 }, { "epoch": 0.9, "grad_norm": 0.5810153720405562, "learning_rate": 6.030845400027186e-06, "loss": 0.553, "step": 7078 }, { "epoch": 0.9, "grad_norm": 0.7575050037271156, "learning_rate": 6.029835954085097e-06, "loss": 0.6126, "step": 7079 }, { "epoch": 0.9, "grad_norm": 0.7596357862222697, "learning_rate": 6.028826464306183e-06, "loss": 0.6089, "step": 7080 }, { "epoch": 0.9, "grad_norm": 0.5339339948938192, "learning_rate": 6.027816930733413e-06, "loss": 0.4795, "step": 7081 }, { "epoch": 0.9, "grad_norm": 0.8948840595780623, "learning_rate": 6.026807353409762e-06, "loss": 0.6023, "step": 7082 }, { "epoch": 0.9, "grad_norm": 0.732670892906231, "learning_rate": 6.0257977323782025e-06, "loss": 0.6055, "step": 7083 }, { "epoch": 0.9, "grad_norm": 0.6642119407639585, "learning_rate": 6.02478806768171e-06, "loss": 0.5293, "step": 7084 }, { "epoch": 0.9, "grad_norm": 0.5666997521276964, "learning_rate": 6.023778359363266e-06, "loss": 0.4599, "step": 7085 }, { "epoch": 0.9, "grad_norm": 0.8067941820503703, "learning_rate": 6.022768607465849e-06, "loss": 0.6445, "step": 7086 }, { "epoch": 0.9, "grad_norm": 0.8362104587225783, "learning_rate": 6.02175881203244e-06, "loss": 0.6197, "step": 7087 }, { "epoch": 0.9, "grad_norm": 0.9487357113369056, "learning_rate": 6.0207489731060234e-06, "loss": 0.6509, "step": 7088 }, { "epoch": 0.9, "grad_norm": 0.6394645143540107, "learning_rate": 6.019739090729585e-06, "loss": 0.5156, "step": 7089 }, { "epoch": 0.9, "grad_norm": 0.7150408067804788, "learning_rate": 6.018729164946112e-06, "loss": 0.5964, "step": 7090 }, { "epoch": 0.9, "grad_norm": 0.8315369821989891, "learning_rate": 6.017719195798595e-06, "loss": 0.6406, "step": 7091 }, { "epoch": 0.9, "grad_norm": 0.699505475163175, "learning_rate": 6.016709183330023e-06, "loss": 0.5979, "step": 7092 }, { "epoch": 0.9, "grad_norm": 0.6470538483120143, "learning_rate": 6.0156991275833895e-06, "loss": 0.5236, "step": 7093 }, { "epoch": 0.9, "grad_norm": 0.816930821266561, "learning_rate": 6.0146890286016905e-06, "loss": 0.5391, "step": 7094 }, { "epoch": 0.9, "grad_norm": 0.7199362070824941, "learning_rate": 6.013678886427921e-06, "loss": 0.5074, "step": 7095 }, { "epoch": 0.9, "grad_norm": 0.7627784379619833, "learning_rate": 6.012668701105081e-06, "loss": 0.531, "step": 7096 }, { "epoch": 0.9, "grad_norm": 1.0517068494106159, "learning_rate": 6.011658472676172e-06, "loss": 0.6331, "step": 7097 }, { "epoch": 0.9, "grad_norm": 0.7076510314830814, "learning_rate": 6.010648201184193e-06, "loss": 0.6351, "step": 7098 }, { "epoch": 0.9, "grad_norm": 0.7192164568048685, "learning_rate": 6.009637886672151e-06, "loss": 0.6225, "step": 7099 }, { "epoch": 0.9, "grad_norm": 0.6591453296503997, "learning_rate": 6.008627529183049e-06, "loss": 0.5122, "step": 7100 }, { "epoch": 0.9, "grad_norm": 0.908526565693428, "learning_rate": 6.007617128759897e-06, "loss": 0.636, "step": 7101 }, { "epoch": 0.9, "grad_norm": 0.6526237649533455, "learning_rate": 6.006606685445703e-06, "loss": 0.5644, "step": 7102 }, { "epoch": 0.9, "grad_norm": 0.5511515448112384, "learning_rate": 6.005596199283479e-06, "loss": 0.4747, "step": 7103 }, { "epoch": 0.91, "grad_norm": 0.6527267519955694, "learning_rate": 6.004585670316239e-06, "loss": 0.5206, "step": 7104 }, { "epoch": 0.91, "grad_norm": 0.7818144627686768, "learning_rate": 6.003575098586997e-06, "loss": 0.6209, "step": 7105 }, { "epoch": 0.91, "grad_norm": 0.6316059920579868, "learning_rate": 6.00256448413877e-06, "loss": 0.5712, "step": 7106 }, { "epoch": 0.91, "grad_norm": 0.5420774402223564, "learning_rate": 6.001553827014577e-06, "loss": 0.4941, "step": 7107 }, { "epoch": 0.91, "grad_norm": 0.6586518366308904, "learning_rate": 6.000543127257438e-06, "loss": 0.5324, "step": 7108 }, { "epoch": 0.91, "grad_norm": 0.7115921284145044, "learning_rate": 5.999532384910374e-06, "loss": 0.5458, "step": 7109 }, { "epoch": 0.91, "grad_norm": 0.7526653039661527, "learning_rate": 5.998521600016411e-06, "loss": 0.5702, "step": 7110 }, { "epoch": 0.91, "grad_norm": 0.8628833770275489, "learning_rate": 5.997510772618576e-06, "loss": 0.6212, "step": 7111 }, { "epoch": 0.91, "grad_norm": 0.7805227832120558, "learning_rate": 5.9964999027598935e-06, "loss": 0.5973, "step": 7112 }, { "epoch": 0.91, "grad_norm": 0.5602003628481105, "learning_rate": 5.995488990483395e-06, "loss": 0.5268, "step": 7113 }, { "epoch": 0.91, "grad_norm": 0.6177402675696717, "learning_rate": 5.994478035832111e-06, "loss": 0.5986, "step": 7114 }, { "epoch": 0.91, "grad_norm": 0.5909271036335947, "learning_rate": 5.993467038849075e-06, "loss": 0.5722, "step": 7115 }, { "epoch": 0.91, "grad_norm": 0.8137966181961045, "learning_rate": 5.9924559995773215e-06, "loss": 0.6335, "step": 7116 }, { "epoch": 0.91, "grad_norm": 0.8055655690688112, "learning_rate": 5.991444918059887e-06, "loss": 0.6479, "step": 7117 }, { "epoch": 0.91, "grad_norm": 0.7368075426446393, "learning_rate": 5.990433794339812e-06, "loss": 0.5828, "step": 7118 }, { "epoch": 0.91, "grad_norm": 0.6376893108332394, "learning_rate": 5.9894226284601355e-06, "loss": 0.5768, "step": 7119 }, { "epoch": 0.91, "grad_norm": 0.7633560778863631, "learning_rate": 5.988411420463898e-06, "loss": 0.6285, "step": 7120 }, { "epoch": 0.91, "grad_norm": 0.5656994053803676, "learning_rate": 5.987400170394146e-06, "loss": 0.5029, "step": 7121 }, { "epoch": 0.91, "grad_norm": 0.6560430090900162, "learning_rate": 5.986388878293923e-06, "loss": 0.5222, "step": 7122 }, { "epoch": 0.91, "grad_norm": 0.7305105623059527, "learning_rate": 5.985377544206278e-06, "loss": 0.5946, "step": 7123 }, { "epoch": 0.91, "grad_norm": 0.7100536238867052, "learning_rate": 5.98436616817426e-06, "loss": 0.5535, "step": 7124 }, { "epoch": 0.91, "grad_norm": 1.3688451111270026, "learning_rate": 5.98335475024092e-06, "loss": 0.5401, "step": 7125 }, { "epoch": 0.91, "grad_norm": 0.606096566139521, "learning_rate": 5.982343290449311e-06, "loss": 0.5654, "step": 7126 }, { "epoch": 0.91, "grad_norm": 0.9467145391972902, "learning_rate": 5.981331788842485e-06, "loss": 0.5792, "step": 7127 }, { "epoch": 0.91, "grad_norm": 0.8465251048556427, "learning_rate": 5.980320245463502e-06, "loss": 0.6222, "step": 7128 }, { "epoch": 0.91, "grad_norm": 0.5689033131252232, "learning_rate": 5.979308660355419e-06, "loss": 0.4894, "step": 7129 }, { "epoch": 0.91, "grad_norm": 0.5929282360825587, "learning_rate": 5.978297033561295e-06, "loss": 0.5315, "step": 7130 }, { "epoch": 0.91, "grad_norm": 1.2609677645202362, "learning_rate": 5.977285365124195e-06, "loss": 0.6, "step": 7131 }, { "epoch": 0.91, "grad_norm": 0.6110118892147859, "learning_rate": 5.976273655087178e-06, "loss": 0.4886, "step": 7132 }, { "epoch": 0.91, "grad_norm": 0.7791127968859274, "learning_rate": 5.97526190349331e-06, "loss": 0.619, "step": 7133 }, { "epoch": 0.91, "grad_norm": 0.5583853862987631, "learning_rate": 5.974250110385661e-06, "loss": 0.5145, "step": 7134 }, { "epoch": 0.91, "grad_norm": 0.6382480622421793, "learning_rate": 5.9732382758072985e-06, "loss": 0.5536, "step": 7135 }, { "epoch": 0.91, "grad_norm": 0.824847088257256, "learning_rate": 5.9722263998012905e-06, "loss": 0.6172, "step": 7136 }, { "epoch": 0.91, "grad_norm": 0.7809217260638395, "learning_rate": 5.971214482410713e-06, "loss": 0.619, "step": 7137 }, { "epoch": 0.91, "grad_norm": 0.9115552827998088, "learning_rate": 5.9702025236786385e-06, "loss": 0.6926, "step": 7138 }, { "epoch": 0.91, "grad_norm": 0.8096508120858464, "learning_rate": 5.969190523648143e-06, "loss": 0.5813, "step": 7139 }, { "epoch": 0.91, "grad_norm": 0.5385794462804069, "learning_rate": 5.9681784823623035e-06, "loss": 0.4845, "step": 7140 }, { "epoch": 0.91, "grad_norm": 0.5701950137838097, "learning_rate": 5.967166399864199e-06, "loss": 0.5091, "step": 7141 }, { "epoch": 0.91, "grad_norm": 0.6510968743689732, "learning_rate": 5.9661542761969134e-06, "loss": 0.5597, "step": 7142 }, { "epoch": 0.91, "grad_norm": 0.6148001912811577, "learning_rate": 5.965142111403527e-06, "loss": 0.5745, "step": 7143 }, { "epoch": 0.91, "grad_norm": 0.6375709545722904, "learning_rate": 5.964129905527125e-06, "loss": 0.5938, "step": 7144 }, { "epoch": 0.91, "grad_norm": 0.8125509007726149, "learning_rate": 5.963117658610794e-06, "loss": 0.5854, "step": 7145 }, { "epoch": 0.91, "grad_norm": 0.616028726265624, "learning_rate": 5.96210537069762e-06, "loss": 0.4918, "step": 7146 }, { "epoch": 0.91, "grad_norm": 0.6498585460254964, "learning_rate": 5.961093041830698e-06, "loss": 0.5581, "step": 7147 }, { "epoch": 0.91, "grad_norm": 0.739770037097316, "learning_rate": 5.960080672053115e-06, "loss": 0.5302, "step": 7148 }, { "epoch": 0.91, "grad_norm": 0.8914153248838461, "learning_rate": 5.959068261407965e-06, "loss": 0.6369, "step": 7149 }, { "epoch": 0.91, "grad_norm": 0.7522871068945128, "learning_rate": 5.958055809938345e-06, "loss": 0.5893, "step": 7150 }, { "epoch": 0.91, "grad_norm": 0.8661673027769349, "learning_rate": 5.95704331768735e-06, "loss": 0.6221, "step": 7151 }, { "epoch": 0.91, "grad_norm": 0.8137186745809454, "learning_rate": 5.956030784698081e-06, "loss": 0.6102, "step": 7152 }, { "epoch": 0.91, "grad_norm": 0.6389780260807015, "learning_rate": 5.9550182110136345e-06, "loss": 0.4756, "step": 7153 }, { "epoch": 0.91, "grad_norm": 0.9007838809121117, "learning_rate": 5.954005596677115e-06, "loss": 0.6118, "step": 7154 }, { "epoch": 0.91, "grad_norm": 0.8722979637220367, "learning_rate": 5.952992941731626e-06, "loss": 0.6217, "step": 7155 }, { "epoch": 0.91, "grad_norm": 0.6290544796256371, "learning_rate": 5.951980246220272e-06, "loss": 0.5266, "step": 7156 }, { "epoch": 0.91, "grad_norm": 0.7740339630176876, "learning_rate": 5.9509675101861604e-06, "loss": 0.6114, "step": 7157 }, { "epoch": 0.91, "grad_norm": 0.7094801167297725, "learning_rate": 5.9499547336724025e-06, "loss": 0.5376, "step": 7158 }, { "epoch": 0.91, "grad_norm": 0.670911401387463, "learning_rate": 5.948941916722107e-06, "loss": 0.5188, "step": 7159 }, { "epoch": 0.91, "grad_norm": 0.6805473534722848, "learning_rate": 5.9479290593783865e-06, "loss": 0.5408, "step": 7160 }, { "epoch": 0.91, "grad_norm": 0.5415560859130355, "learning_rate": 5.9469161616843554e-06, "loss": 0.5114, "step": 7161 }, { "epoch": 0.91, "grad_norm": 0.6999988889602705, "learning_rate": 5.945903223683128e-06, "loss": 0.5516, "step": 7162 }, { "epoch": 0.91, "grad_norm": 1.5969446972468377, "learning_rate": 5.944890245417825e-06, "loss": 0.5858, "step": 7163 }, { "epoch": 0.91, "grad_norm": 0.780556428227834, "learning_rate": 5.94387722693156e-06, "loss": 0.6036, "step": 7164 }, { "epoch": 0.91, "grad_norm": 0.5729227086510631, "learning_rate": 5.942864168267461e-06, "loss": 0.5475, "step": 7165 }, { "epoch": 0.91, "grad_norm": 0.5526932724807357, "learning_rate": 5.941851069468646e-06, "loss": 0.53, "step": 7166 }, { "epoch": 0.91, "grad_norm": 0.6565265821662218, "learning_rate": 5.9408379305782415e-06, "loss": 0.5202, "step": 7167 }, { "epoch": 0.91, "grad_norm": 0.6865929034117233, "learning_rate": 5.939824751639373e-06, "loss": 0.5723, "step": 7168 }, { "epoch": 0.91, "grad_norm": 0.7925018445090999, "learning_rate": 5.938811532695166e-06, "loss": 0.5967, "step": 7169 }, { "epoch": 0.91, "grad_norm": 0.7102343774352992, "learning_rate": 5.937798273788754e-06, "loss": 0.5892, "step": 7170 }, { "epoch": 0.91, "grad_norm": 0.8779940712735889, "learning_rate": 5.936784974963266e-06, "loss": 0.5544, "step": 7171 }, { "epoch": 0.91, "grad_norm": 0.6772792441690239, "learning_rate": 5.935771636261835e-06, "loss": 0.5276, "step": 7172 }, { "epoch": 0.91, "grad_norm": 1.043407671374952, "learning_rate": 5.934758257727595e-06, "loss": 0.5983, "step": 7173 }, { "epoch": 0.91, "grad_norm": 0.5830818131182096, "learning_rate": 5.933744839403683e-06, "loss": 0.4846, "step": 7174 }, { "epoch": 0.91, "grad_norm": 0.8823314896482773, "learning_rate": 5.932731381333239e-06, "loss": 0.646, "step": 7175 }, { "epoch": 0.91, "grad_norm": 0.6289780052938609, "learning_rate": 5.931717883559399e-06, "loss": 0.52, "step": 7176 }, { "epoch": 0.91, "grad_norm": 0.5476421579791547, "learning_rate": 5.930704346125306e-06, "loss": 0.5383, "step": 7177 }, { "epoch": 0.91, "grad_norm": 0.635112023277828, "learning_rate": 5.929690769074103e-06, "loss": 0.5503, "step": 7178 }, { "epoch": 0.91, "grad_norm": 0.8178462896208889, "learning_rate": 5.928677152448935e-06, "loss": 0.6054, "step": 7179 }, { "epoch": 0.91, "grad_norm": 0.7534776143426254, "learning_rate": 5.9276634962929495e-06, "loss": 0.6388, "step": 7180 }, { "epoch": 0.91, "grad_norm": 0.7323572319169261, "learning_rate": 5.926649800649293e-06, "loss": 0.5689, "step": 7181 }, { "epoch": 0.91, "grad_norm": 2.9494818224270936, "learning_rate": 5.925636065561113e-06, "loss": 0.5651, "step": 7182 }, { "epoch": 0.92, "grad_norm": 0.7334370926046292, "learning_rate": 5.9246222910715655e-06, "loss": 0.5804, "step": 7183 }, { "epoch": 0.92, "grad_norm": 1.1804973622481583, "learning_rate": 5.923608477223803e-06, "loss": 0.593, "step": 7184 }, { "epoch": 0.92, "grad_norm": 0.5964010756064292, "learning_rate": 5.922594624060978e-06, "loss": 0.5087, "step": 7185 }, { "epoch": 0.92, "grad_norm": 0.753845475829064, "learning_rate": 5.921580731626248e-06, "loss": 0.5902, "step": 7186 }, { "epoch": 0.92, "grad_norm": 0.6525589041353362, "learning_rate": 5.92056679996277e-06, "loss": 0.5023, "step": 7187 }, { "epoch": 0.92, "grad_norm": 0.6700678120394333, "learning_rate": 5.919552829113707e-06, "loss": 0.5163, "step": 7188 }, { "epoch": 0.92, "grad_norm": 0.7170149945197423, "learning_rate": 5.918538819122217e-06, "loss": 0.5363, "step": 7189 }, { "epoch": 0.92, "grad_norm": 0.8488018843512644, "learning_rate": 5.917524770031465e-06, "loss": 0.592, "step": 7190 }, { "epoch": 0.92, "grad_norm": 0.6533620222904847, "learning_rate": 5.916510681884616e-06, "loss": 0.5723, "step": 7191 }, { "epoch": 0.92, "grad_norm": 0.6408270049772504, "learning_rate": 5.915496554724837e-06, "loss": 0.4436, "step": 7192 }, { "epoch": 0.92, "grad_norm": 0.6786165644820878, "learning_rate": 5.914482388595294e-06, "loss": 0.5396, "step": 7193 }, { "epoch": 0.92, "grad_norm": 0.6308099791159926, "learning_rate": 5.913468183539158e-06, "loss": 0.5553, "step": 7194 }, { "epoch": 0.92, "grad_norm": 0.5258517317425928, "learning_rate": 5.9124539395996e-06, "loss": 0.5148, "step": 7195 }, { "epoch": 0.92, "grad_norm": 0.6638753437974643, "learning_rate": 5.911439656819794e-06, "loss": 0.5294, "step": 7196 }, { "epoch": 0.92, "grad_norm": 0.5745802024832648, "learning_rate": 5.910425335242914e-06, "loss": 0.5142, "step": 7197 }, { "epoch": 0.92, "grad_norm": 0.7218634303445264, "learning_rate": 5.9094109749121375e-06, "loss": 0.592, "step": 7198 }, { "epoch": 0.92, "grad_norm": 0.8275995525581679, "learning_rate": 5.9083965758706415e-06, "loss": 0.6407, "step": 7199 }, { "epoch": 0.92, "grad_norm": 0.7125587368919056, "learning_rate": 5.907382138161607e-06, "loss": 0.6005, "step": 7200 }, { "epoch": 0.92, "grad_norm": 0.683047625340476, "learning_rate": 5.906367661828214e-06, "loss": 0.5889, "step": 7201 }, { "epoch": 0.92, "grad_norm": 0.8188590980388989, "learning_rate": 5.905353146913645e-06, "loss": 0.6049, "step": 7202 }, { "epoch": 0.92, "grad_norm": 0.5829997877623249, "learning_rate": 5.904338593461087e-06, "loss": 0.544, "step": 7203 }, { "epoch": 0.92, "grad_norm": 1.0746563096274913, "learning_rate": 5.903324001513724e-06, "loss": 0.5755, "step": 7204 }, { "epoch": 0.92, "grad_norm": 0.6011755085581385, "learning_rate": 5.902309371114745e-06, "loss": 0.5377, "step": 7205 }, { "epoch": 0.92, "grad_norm": 0.6765894960483733, "learning_rate": 5.901294702307339e-06, "loss": 0.5617, "step": 7206 }, { "epoch": 0.92, "grad_norm": 0.6419749281686844, "learning_rate": 5.900279995134699e-06, "loss": 0.5236, "step": 7207 }, { "epoch": 0.92, "grad_norm": 0.7260466237244242, "learning_rate": 5.899265249640014e-06, "loss": 0.5627, "step": 7208 }, { "epoch": 0.92, "grad_norm": 0.6804589382568738, "learning_rate": 5.898250465866483e-06, "loss": 0.6041, "step": 7209 }, { "epoch": 0.92, "grad_norm": 0.7260330565911145, "learning_rate": 5.8972356438573e-06, "loss": 0.6468, "step": 7210 }, { "epoch": 0.92, "grad_norm": 0.8120694239781333, "learning_rate": 5.896220783655663e-06, "loss": 0.6234, "step": 7211 }, { "epoch": 0.92, "grad_norm": 0.7537635229403995, "learning_rate": 5.89520588530477e-06, "loss": 0.6105, "step": 7212 }, { "epoch": 0.92, "grad_norm": 0.631604668002357, "learning_rate": 5.894190948847824e-06, "loss": 0.5171, "step": 7213 }, { "epoch": 0.92, "grad_norm": 0.613916991971185, "learning_rate": 5.893175974328027e-06, "loss": 0.5376, "step": 7214 }, { "epoch": 0.92, "grad_norm": 0.6614329216719724, "learning_rate": 5.892160961788582e-06, "loss": 0.5349, "step": 7215 }, { "epoch": 0.92, "grad_norm": 0.6898093405177034, "learning_rate": 5.891145911272697e-06, "loss": 0.5927, "step": 7216 }, { "epoch": 0.92, "grad_norm": 1.2641626101636871, "learning_rate": 5.890130822823578e-06, "loss": 0.6472, "step": 7217 }, { "epoch": 0.92, "grad_norm": 0.5193522151683049, "learning_rate": 5.889115696484433e-06, "loss": 0.4806, "step": 7218 }, { "epoch": 0.92, "grad_norm": 0.693461264309939, "learning_rate": 5.888100532298474e-06, "loss": 0.5587, "step": 7219 }, { "epoch": 0.92, "grad_norm": 0.775924265788248, "learning_rate": 5.8870853303089145e-06, "loss": 0.6299, "step": 7220 }, { "epoch": 0.92, "grad_norm": 0.7564253579679279, "learning_rate": 5.886070090558967e-06, "loss": 0.6224, "step": 7221 }, { "epoch": 0.92, "grad_norm": 0.7511805664848867, "learning_rate": 5.885054813091847e-06, "loss": 0.545, "step": 7222 }, { "epoch": 0.92, "grad_norm": 0.7532209394603636, "learning_rate": 5.884039497950773e-06, "loss": 0.6317, "step": 7223 }, { "epoch": 0.92, "grad_norm": 0.6535111507141641, "learning_rate": 5.883024145178961e-06, "loss": 0.5001, "step": 7224 }, { "epoch": 0.92, "grad_norm": 0.6102915383448848, "learning_rate": 5.882008754819634e-06, "loss": 0.5686, "step": 7225 }, { "epoch": 0.92, "grad_norm": 0.8643434331530937, "learning_rate": 5.880993326916012e-06, "loss": 0.6236, "step": 7226 }, { "epoch": 0.92, "grad_norm": 0.7603682620802733, "learning_rate": 5.879977861511319e-06, "loss": 0.5728, "step": 7227 }, { "epoch": 0.92, "grad_norm": 0.7440511129750577, "learning_rate": 5.878962358648781e-06, "loss": 0.5748, "step": 7228 }, { "epoch": 0.92, "grad_norm": 0.7453367244684165, "learning_rate": 5.877946818371624e-06, "loss": 0.593, "step": 7229 }, { "epoch": 0.92, "grad_norm": 0.7142201825666366, "learning_rate": 5.876931240723076e-06, "loss": 0.5748, "step": 7230 }, { "epoch": 0.92, "grad_norm": 0.6348020574814905, "learning_rate": 5.875915625746369e-06, "loss": 0.4652, "step": 7231 }, { "epoch": 0.92, "grad_norm": 0.5732572572783668, "learning_rate": 5.874899973484731e-06, "loss": 0.5243, "step": 7232 }, { "epoch": 0.92, "grad_norm": 0.8021992513079979, "learning_rate": 5.8738842839813966e-06, "loss": 0.5826, "step": 7233 }, { "epoch": 0.92, "grad_norm": 0.5565199083502078, "learning_rate": 5.8728685572796025e-06, "loss": 0.4745, "step": 7234 }, { "epoch": 0.92, "grad_norm": 1.0035041655435828, "learning_rate": 5.871852793422582e-06, "loss": 0.6218, "step": 7235 }, { "epoch": 0.92, "grad_norm": 0.564083561414303, "learning_rate": 5.870836992453576e-06, "loss": 0.5022, "step": 7236 }, { "epoch": 0.92, "grad_norm": 0.8312163668841487, "learning_rate": 5.86982115441582e-06, "loss": 0.5982, "step": 7237 }, { "epoch": 0.92, "grad_norm": 0.556786251097832, "learning_rate": 5.868805279352559e-06, "loss": 0.5339, "step": 7238 }, { "epoch": 0.92, "grad_norm": 0.7339868029490866, "learning_rate": 5.867789367307031e-06, "loss": 0.6447, "step": 7239 }, { "epoch": 0.92, "grad_norm": 0.7161191679438755, "learning_rate": 5.8667734183224835e-06, "loss": 0.5506, "step": 7240 }, { "epoch": 0.92, "grad_norm": 0.7817509442462736, "learning_rate": 5.865757432442162e-06, "loss": 0.6232, "step": 7241 }, { "epoch": 0.92, "grad_norm": 0.8237965183738162, "learning_rate": 5.864741409709313e-06, "loss": 0.6474, "step": 7242 }, { "epoch": 0.92, "grad_norm": 0.7999948748300804, "learning_rate": 5.863725350167185e-06, "loss": 0.6375, "step": 7243 }, { "epoch": 0.92, "grad_norm": 0.6992666595332188, "learning_rate": 5.8627092538590305e-06, "loss": 0.6001, "step": 7244 }, { "epoch": 0.92, "grad_norm": 0.6423170497339399, "learning_rate": 5.861693120828097e-06, "loss": 0.5058, "step": 7245 }, { "epoch": 0.92, "grad_norm": 0.8022773476160895, "learning_rate": 5.860676951117643e-06, "loss": 0.587, "step": 7246 }, { "epoch": 0.92, "grad_norm": 0.7104105558828214, "learning_rate": 5.859660744770922e-06, "loss": 0.527, "step": 7247 }, { "epoch": 0.92, "grad_norm": 0.7307800551745314, "learning_rate": 5.858644501831189e-06, "loss": 0.5882, "step": 7248 }, { "epoch": 0.92, "grad_norm": 0.8297109238648962, "learning_rate": 5.857628222341705e-06, "loss": 0.5899, "step": 7249 }, { "epoch": 0.92, "grad_norm": 0.6407306982382849, "learning_rate": 5.856611906345726e-06, "loss": 0.5241, "step": 7250 }, { "epoch": 0.92, "grad_norm": 0.5248318932797091, "learning_rate": 5.855595553886516e-06, "loss": 0.4604, "step": 7251 }, { "epoch": 0.92, "grad_norm": 0.7399990124653967, "learning_rate": 5.854579165007338e-06, "loss": 0.4952, "step": 7252 }, { "epoch": 0.92, "grad_norm": 0.7943497501869831, "learning_rate": 5.853562739751455e-06, "loss": 0.6566, "step": 7253 }, { "epoch": 0.92, "grad_norm": 0.6319138225181162, "learning_rate": 5.852546278162135e-06, "loss": 0.5243, "step": 7254 }, { "epoch": 0.92, "grad_norm": 0.8630142886579614, "learning_rate": 5.851529780282643e-06, "loss": 0.5679, "step": 7255 }, { "epoch": 0.92, "grad_norm": 0.648310338628721, "learning_rate": 5.850513246156251e-06, "loss": 0.5886, "step": 7256 }, { "epoch": 0.92, "grad_norm": 0.5592694301524513, "learning_rate": 5.849496675826226e-06, "loss": 0.5162, "step": 7257 }, { "epoch": 0.92, "grad_norm": 0.6810846159214108, "learning_rate": 5.848480069335843e-06, "loss": 0.5328, "step": 7258 }, { "epoch": 0.92, "grad_norm": 0.892395292191634, "learning_rate": 5.847463426728375e-06, "loss": 0.6174, "step": 7259 }, { "epoch": 0.92, "grad_norm": 0.825818034903631, "learning_rate": 5.846446748047098e-06, "loss": 0.6505, "step": 7260 }, { "epoch": 0.93, "grad_norm": 0.7165267176836811, "learning_rate": 5.845430033335286e-06, "loss": 0.5006, "step": 7261 }, { "epoch": 0.93, "grad_norm": 0.6705033128727618, "learning_rate": 5.84441328263622e-06, "loss": 0.5677, "step": 7262 }, { "epoch": 0.93, "grad_norm": 0.6894400756286736, "learning_rate": 5.843396495993179e-06, "loss": 0.4957, "step": 7263 }, { "epoch": 0.93, "grad_norm": 0.7164755467312727, "learning_rate": 5.842379673449443e-06, "loss": 0.6148, "step": 7264 }, { "epoch": 0.93, "grad_norm": 0.6902510171666226, "learning_rate": 5.841362815048297e-06, "loss": 0.5347, "step": 7265 }, { "epoch": 0.93, "grad_norm": 0.6157792280727161, "learning_rate": 5.840345920833025e-06, "loss": 0.5318, "step": 7266 }, { "epoch": 0.93, "grad_norm": 0.6523571112720159, "learning_rate": 5.839328990846913e-06, "loss": 0.5343, "step": 7267 }, { "epoch": 0.93, "grad_norm": 0.6491774948464895, "learning_rate": 5.838312025133247e-06, "loss": 0.5205, "step": 7268 }, { "epoch": 0.93, "grad_norm": 0.6413892153133484, "learning_rate": 5.837295023735318e-06, "loss": 0.5371, "step": 7269 }, { "epoch": 0.93, "grad_norm": 0.5681557721160956, "learning_rate": 5.836277986696413e-06, "loss": 0.5094, "step": 7270 }, { "epoch": 0.93, "grad_norm": 0.7352275222706279, "learning_rate": 5.835260914059828e-06, "loss": 0.5759, "step": 7271 }, { "epoch": 0.93, "grad_norm": 0.6516485592214992, "learning_rate": 5.8342438058688554e-06, "loss": 0.5103, "step": 7272 }, { "epoch": 0.93, "grad_norm": 0.8861948647087583, "learning_rate": 5.83322666216679e-06, "loss": 0.5509, "step": 7273 }, { "epoch": 0.93, "grad_norm": 1.1264271629169584, "learning_rate": 5.832209482996927e-06, "loss": 0.6502, "step": 7274 }, { "epoch": 0.93, "grad_norm": 0.6007806751201401, "learning_rate": 5.8311922684025665e-06, "loss": 0.5171, "step": 7275 }, { "epoch": 0.93, "grad_norm": 0.5815624149811355, "learning_rate": 5.830175018427007e-06, "loss": 0.5103, "step": 7276 }, { "epoch": 0.93, "grad_norm": 0.746432587254956, "learning_rate": 5.829157733113551e-06, "loss": 0.6755, "step": 7277 }, { "epoch": 0.93, "grad_norm": 0.5971462350187131, "learning_rate": 5.828140412505499e-06, "loss": 0.4912, "step": 7278 }, { "epoch": 0.93, "grad_norm": 0.6938170524870781, "learning_rate": 5.827123056646156e-06, "loss": 0.5943, "step": 7279 }, { "epoch": 0.93, "grad_norm": 0.6160802896861488, "learning_rate": 5.826105665578827e-06, "loss": 0.4987, "step": 7280 }, { "epoch": 0.93, "grad_norm": 0.6498898330890953, "learning_rate": 5.82508823934682e-06, "loss": 0.5708, "step": 7281 }, { "epoch": 0.93, "grad_norm": 0.7738949323289743, "learning_rate": 5.8240707779934435e-06, "loss": 0.5498, "step": 7282 }, { "epoch": 0.93, "grad_norm": 0.7765301423890085, "learning_rate": 5.823053281562008e-06, "loss": 0.623, "step": 7283 }, { "epoch": 0.93, "grad_norm": 0.7132515388069783, "learning_rate": 5.822035750095824e-06, "loss": 0.6325, "step": 7284 }, { "epoch": 0.93, "grad_norm": 0.7143228425316213, "learning_rate": 5.821018183638204e-06, "loss": 0.5357, "step": 7285 }, { "epoch": 0.93, "grad_norm": 0.5636076813919914, "learning_rate": 5.820000582232465e-06, "loss": 0.4801, "step": 7286 }, { "epoch": 0.93, "grad_norm": 0.794985452295735, "learning_rate": 5.818982945921921e-06, "loss": 0.5896, "step": 7287 }, { "epoch": 0.93, "grad_norm": 0.6115182909504777, "learning_rate": 5.8179652747498885e-06, "loss": 0.4978, "step": 7288 }, { "epoch": 0.93, "grad_norm": 0.7767621973329313, "learning_rate": 5.8169475687596885e-06, "loss": 0.6628, "step": 7289 }, { "epoch": 0.93, "grad_norm": 0.6632843083244028, "learning_rate": 5.8159298279946415e-06, "loss": 0.5125, "step": 7290 }, { "epoch": 0.93, "grad_norm": 0.5658385117973802, "learning_rate": 5.814912052498071e-06, "loss": 0.4695, "step": 7291 }, { "epoch": 0.93, "grad_norm": 0.5294647688395007, "learning_rate": 5.813894242313297e-06, "loss": 0.5064, "step": 7292 }, { "epoch": 0.93, "grad_norm": 0.5538328673703952, "learning_rate": 5.812876397483645e-06, "loss": 0.4584, "step": 7293 }, { "epoch": 0.93, "grad_norm": 0.6773857563583625, "learning_rate": 5.811858518052445e-06, "loss": 0.5655, "step": 7294 }, { "epoch": 0.93, "grad_norm": 0.8299489916019177, "learning_rate": 5.810840604063019e-06, "loss": 0.5688, "step": 7295 }, { "epoch": 0.93, "grad_norm": 0.8512504343072516, "learning_rate": 5.809822655558701e-06, "loss": 0.6078, "step": 7296 }, { "epoch": 0.93, "grad_norm": 0.6345573429900008, "learning_rate": 5.808804672582821e-06, "loss": 0.5575, "step": 7297 }, { "epoch": 0.93, "grad_norm": 0.5266777328024713, "learning_rate": 5.80778665517871e-06, "loss": 0.4691, "step": 7298 }, { "epoch": 0.93, "grad_norm": 0.7904841558684887, "learning_rate": 5.806768603389703e-06, "loss": 0.6036, "step": 7299 }, { "epoch": 0.93, "grad_norm": 0.610487168331742, "learning_rate": 5.805750517259134e-06, "loss": 0.5656, "step": 7300 }, { "epoch": 0.93, "grad_norm": 0.777017542457991, "learning_rate": 5.8047323968303395e-06, "loss": 0.571, "step": 7301 }, { "epoch": 0.93, "grad_norm": 0.5665595055161324, "learning_rate": 5.803714242146659e-06, "loss": 0.464, "step": 7302 }, { "epoch": 0.93, "grad_norm": 0.6278771042895364, "learning_rate": 5.802696053251432e-06, "loss": 0.555, "step": 7303 }, { "epoch": 0.93, "grad_norm": 0.6927763676822408, "learning_rate": 5.801677830187999e-06, "loss": 0.6089, "step": 7304 }, { "epoch": 0.93, "grad_norm": 0.6001409500907885, "learning_rate": 5.800659572999703e-06, "loss": 0.5146, "step": 7305 }, { "epoch": 0.93, "grad_norm": 0.6843889087530002, "learning_rate": 5.799641281729887e-06, "loss": 0.5852, "step": 7306 }, { "epoch": 0.93, "grad_norm": 0.6261159346037729, "learning_rate": 5.798622956421897e-06, "loss": 0.5592, "step": 7307 }, { "epoch": 0.93, "grad_norm": 0.710718794672907, "learning_rate": 5.79760459711908e-06, "loss": 0.6004, "step": 7308 }, { "epoch": 0.93, "grad_norm": 0.5953656751254692, "learning_rate": 5.796586203864784e-06, "loss": 0.5419, "step": 7309 }, { "epoch": 0.93, "grad_norm": 0.5661716398805003, "learning_rate": 5.795567776702358e-06, "loss": 0.5404, "step": 7310 }, { "epoch": 0.93, "grad_norm": 0.7715856442709891, "learning_rate": 5.794549315675155e-06, "loss": 0.6305, "step": 7311 }, { "epoch": 0.93, "grad_norm": 0.6470501375160036, "learning_rate": 5.793530820826526e-06, "loss": 0.5366, "step": 7312 }, { "epoch": 0.93, "grad_norm": 0.6474857512999157, "learning_rate": 5.792512292199825e-06, "loss": 0.5427, "step": 7313 }, { "epoch": 0.93, "grad_norm": 0.558137666083832, "learning_rate": 5.79149372983841e-06, "loss": 0.4815, "step": 7314 }, { "epoch": 0.93, "grad_norm": 0.6000185161865772, "learning_rate": 5.790475133785636e-06, "loss": 0.5203, "step": 7315 }, { "epoch": 0.93, "grad_norm": 0.8911390778156995, "learning_rate": 5.789456504084861e-06, "loss": 0.543, "step": 7316 }, { "epoch": 0.93, "grad_norm": 0.7606267790585807, "learning_rate": 5.788437840779445e-06, "loss": 0.5925, "step": 7317 }, { "epoch": 0.93, "grad_norm": 0.7153722157555635, "learning_rate": 5.78741914391275e-06, "loss": 0.6094, "step": 7318 }, { "epoch": 0.93, "grad_norm": 0.8110151344299823, "learning_rate": 5.786400413528137e-06, "loss": 0.5681, "step": 7319 }, { "epoch": 0.93, "grad_norm": 0.9068505354730736, "learning_rate": 5.785381649668973e-06, "loss": 0.6366, "step": 7320 }, { "epoch": 0.93, "grad_norm": 0.7482058628075415, "learning_rate": 5.7843628523786224e-06, "loss": 0.6049, "step": 7321 }, { "epoch": 0.93, "grad_norm": 0.7868115218222387, "learning_rate": 5.78334402170045e-06, "loss": 0.7053, "step": 7322 }, { "epoch": 0.93, "grad_norm": 0.5478806940317313, "learning_rate": 5.782325157677827e-06, "loss": 0.5363, "step": 7323 }, { "epoch": 0.93, "grad_norm": 0.6553736676047508, "learning_rate": 5.781306260354121e-06, "loss": 0.4501, "step": 7324 }, { "epoch": 0.93, "grad_norm": 0.6139132501769308, "learning_rate": 5.780287329772705e-06, "loss": 0.5402, "step": 7325 }, { "epoch": 0.93, "grad_norm": 0.6355656960870817, "learning_rate": 5.77926836597695e-06, "loss": 0.4826, "step": 7326 }, { "epoch": 0.93, "grad_norm": 0.7906631153158185, "learning_rate": 5.778249369010231e-06, "loss": 0.6307, "step": 7327 }, { "epoch": 0.93, "grad_norm": 0.6654108367236921, "learning_rate": 5.777230338915925e-06, "loss": 0.5398, "step": 7328 }, { "epoch": 0.93, "grad_norm": 0.7802883108848935, "learning_rate": 5.776211275737404e-06, "loss": 0.6146, "step": 7329 }, { "epoch": 0.93, "grad_norm": 0.8730210217566167, "learning_rate": 5.775192179518052e-06, "loss": 0.6861, "step": 7330 }, { "epoch": 0.93, "grad_norm": 0.6642755256253636, "learning_rate": 5.774173050301246e-06, "loss": 0.5598, "step": 7331 }, { "epoch": 0.93, "grad_norm": 0.6828931354082131, "learning_rate": 5.773153888130365e-06, "loss": 0.5477, "step": 7332 }, { "epoch": 0.93, "grad_norm": 0.8504980540566135, "learning_rate": 5.772134693048796e-06, "loss": 0.6737, "step": 7333 }, { "epoch": 0.93, "grad_norm": 0.8626703537939912, "learning_rate": 5.771115465099919e-06, "loss": 0.6658, "step": 7334 }, { "epoch": 0.93, "grad_norm": 0.7064141432567075, "learning_rate": 5.770096204327121e-06, "loss": 0.5683, "step": 7335 }, { "epoch": 0.93, "grad_norm": 0.7370490035885552, "learning_rate": 5.76907691077379e-06, "loss": 0.5936, "step": 7336 }, { "epoch": 0.93, "grad_norm": 0.7928807843531035, "learning_rate": 5.768057584483311e-06, "loss": 0.6099, "step": 7337 }, { "epoch": 0.93, "grad_norm": 0.5556098517357636, "learning_rate": 5.767038225499075e-06, "loss": 0.4458, "step": 7338 }, { "epoch": 0.93, "grad_norm": 0.5269786739704576, "learning_rate": 5.766018833864474e-06, "loss": 0.4881, "step": 7339 }, { "epoch": 0.94, "grad_norm": 0.6518402603283743, "learning_rate": 5.764999409622899e-06, "loss": 0.593, "step": 7340 }, { "epoch": 0.94, "grad_norm": 0.8607622627960236, "learning_rate": 5.763979952817742e-06, "loss": 0.6132, "step": 7341 }, { "epoch": 0.94, "grad_norm": 0.5550745442235996, "learning_rate": 5.762960463492402e-06, "loss": 0.4642, "step": 7342 }, { "epoch": 0.94, "grad_norm": 0.6982318183972722, "learning_rate": 5.761940941690271e-06, "loss": 0.5902, "step": 7343 }, { "epoch": 0.94, "grad_norm": 0.6883622646587318, "learning_rate": 5.76092138745475e-06, "loss": 0.517, "step": 7344 }, { "epoch": 0.94, "grad_norm": 0.6213778884618992, "learning_rate": 5.759901800829236e-06, "loss": 0.5233, "step": 7345 }, { "epoch": 0.94, "grad_norm": 0.6308561300454176, "learning_rate": 5.758882181857132e-06, "loss": 0.5012, "step": 7346 }, { "epoch": 0.94, "grad_norm": 0.6492288607943988, "learning_rate": 5.757862530581838e-06, "loss": 0.5539, "step": 7347 }, { "epoch": 0.94, "grad_norm": 0.5186728290297209, "learning_rate": 5.7568428470467585e-06, "loss": 0.5327, "step": 7348 }, { "epoch": 0.94, "grad_norm": 0.6570017285976666, "learning_rate": 5.755823131295297e-06, "loss": 0.5791, "step": 7349 }, { "epoch": 0.94, "grad_norm": 0.8145197848347757, "learning_rate": 5.7548033833708594e-06, "loss": 0.6382, "step": 7350 }, { "epoch": 0.94, "grad_norm": 0.6820084692761272, "learning_rate": 5.753783603316854e-06, "loss": 0.5583, "step": 7351 }, { "epoch": 0.94, "grad_norm": 0.6332458317116423, "learning_rate": 5.75276379117669e-06, "loss": 0.5421, "step": 7352 }, { "epoch": 0.94, "grad_norm": 0.6461586618587962, "learning_rate": 5.7517439469937775e-06, "loss": 0.5205, "step": 7353 }, { "epoch": 0.94, "grad_norm": 0.6096986912375691, "learning_rate": 5.750724070811526e-06, "loss": 0.5177, "step": 7354 }, { "epoch": 0.94, "grad_norm": 0.7751609533970542, "learning_rate": 5.74970416267335e-06, "loss": 0.6546, "step": 7355 }, { "epoch": 0.94, "grad_norm": 0.5793601989944073, "learning_rate": 5.748684222622664e-06, "loss": 0.476, "step": 7356 }, { "epoch": 0.94, "grad_norm": 0.636761912942386, "learning_rate": 5.747664250702882e-06, "loss": 0.5679, "step": 7357 }, { "epoch": 0.94, "grad_norm": 0.5912910977846981, "learning_rate": 5.746644246957423e-06, "loss": 0.4883, "step": 7358 }, { "epoch": 0.94, "grad_norm": 0.8115293024724972, "learning_rate": 5.745624211429705e-06, "loss": 0.5448, "step": 7359 }, { "epoch": 0.94, "grad_norm": 0.6331336748521982, "learning_rate": 5.744604144163146e-06, "loss": 0.5274, "step": 7360 }, { "epoch": 0.94, "grad_norm": 0.6858756021816557, "learning_rate": 5.7435840452011695e-06, "loss": 0.548, "step": 7361 }, { "epoch": 0.94, "grad_norm": 0.5814533625575896, "learning_rate": 5.742563914587195e-06, "loss": 0.4806, "step": 7362 }, { "epoch": 0.94, "grad_norm": 0.8448686031097746, "learning_rate": 5.741543752364646e-06, "loss": 0.604, "step": 7363 }, { "epoch": 0.94, "grad_norm": 0.7131375390963478, "learning_rate": 5.740523558576951e-06, "loss": 0.6207, "step": 7364 }, { "epoch": 0.94, "grad_norm": 0.6041193615922887, "learning_rate": 5.739503333267535e-06, "loss": 0.5577, "step": 7365 }, { "epoch": 0.94, "grad_norm": 0.6476494627256795, "learning_rate": 5.738483076479825e-06, "loss": 0.5687, "step": 7366 }, { "epoch": 0.94, "grad_norm": 0.5732051931699436, "learning_rate": 5.73746278825725e-06, "loss": 0.5384, "step": 7367 }, { "epoch": 0.94, "grad_norm": 0.7014535737222668, "learning_rate": 5.73644246864324e-06, "loss": 0.5415, "step": 7368 }, { "epoch": 0.94, "grad_norm": 0.890259532896133, "learning_rate": 5.735422117681228e-06, "loss": 0.6049, "step": 7369 }, { "epoch": 0.94, "grad_norm": 0.962262608558948, "learning_rate": 5.734401735414646e-06, "loss": 0.6252, "step": 7370 }, { "epoch": 0.94, "grad_norm": 0.6922243929075977, "learning_rate": 5.733381321886929e-06, "loss": 0.6154, "step": 7371 }, { "epoch": 0.94, "grad_norm": 0.8310460094501119, "learning_rate": 5.732360877141514e-06, "loss": 0.5878, "step": 7372 }, { "epoch": 0.94, "grad_norm": 0.6793760578554305, "learning_rate": 5.731340401221835e-06, "loss": 0.595, "step": 7373 }, { "epoch": 0.94, "grad_norm": 0.6752946060588411, "learning_rate": 5.730319894171335e-06, "loss": 0.5282, "step": 7374 }, { "epoch": 0.94, "grad_norm": 0.6121113321309787, "learning_rate": 5.729299356033446e-06, "loss": 0.5057, "step": 7375 }, { "epoch": 0.94, "grad_norm": 0.6289692836393576, "learning_rate": 5.728278786851618e-06, "loss": 0.5292, "step": 7376 }, { "epoch": 0.94, "grad_norm": 0.5610746626640373, "learning_rate": 5.727258186669288e-06, "loss": 0.4688, "step": 7377 }, { "epoch": 0.94, "grad_norm": 0.627680869265941, "learning_rate": 5.726237555529901e-06, "loss": 0.5053, "step": 7378 }, { "epoch": 0.94, "grad_norm": 0.6137069671782773, "learning_rate": 5.7252168934769024e-06, "loss": 0.501, "step": 7379 }, { "epoch": 0.94, "grad_norm": 0.7840440910327866, "learning_rate": 5.724196200553738e-06, "loss": 0.5845, "step": 7380 }, { "epoch": 0.94, "grad_norm": 0.7814997338324008, "learning_rate": 5.723175476803854e-06, "loss": 0.6358, "step": 7381 }, { "epoch": 0.94, "grad_norm": 0.8715585755067938, "learning_rate": 5.722154722270703e-06, "loss": 0.6125, "step": 7382 }, { "epoch": 0.94, "grad_norm": 0.6487162391170347, "learning_rate": 5.721133936997732e-06, "loss": 0.5712, "step": 7383 }, { "epoch": 0.94, "grad_norm": 0.541181427380261, "learning_rate": 5.720113121028394e-06, "loss": 0.4945, "step": 7384 }, { "epoch": 0.94, "grad_norm": 0.645279531852808, "learning_rate": 5.719092274406142e-06, "loss": 0.5564, "step": 7385 }, { "epoch": 0.94, "grad_norm": 0.8028404859534641, "learning_rate": 5.718071397174429e-06, "loss": 0.606, "step": 7386 }, { "epoch": 0.94, "grad_norm": 0.6000211867106248, "learning_rate": 5.717050489376712e-06, "loss": 0.4833, "step": 7387 }, { "epoch": 0.94, "grad_norm": 0.6481284086082985, "learning_rate": 5.7160295510564456e-06, "loss": 0.4924, "step": 7388 }, { "epoch": 0.94, "grad_norm": 0.6870893518460691, "learning_rate": 5.715008582257091e-06, "loss": 0.5259, "step": 7389 }, { "epoch": 0.94, "grad_norm": 0.5624763214807417, "learning_rate": 5.713987583022106e-06, "loss": 0.4652, "step": 7390 }, { "epoch": 0.94, "grad_norm": 0.7734190247354849, "learning_rate": 5.71296655339495e-06, "loss": 0.5363, "step": 7391 }, { "epoch": 0.94, "grad_norm": 0.6086705751030543, "learning_rate": 5.7119454934190866e-06, "loss": 0.564, "step": 7392 }, { "epoch": 0.94, "grad_norm": 0.6276722535599929, "learning_rate": 5.710924403137979e-06, "loss": 0.5594, "step": 7393 }, { "epoch": 0.94, "grad_norm": 0.7243127724305656, "learning_rate": 5.70990328259509e-06, "loss": 0.6064, "step": 7394 }, { "epoch": 0.94, "grad_norm": 0.6605308434412986, "learning_rate": 5.708882131833888e-06, "loss": 0.5117, "step": 7395 }, { "epoch": 0.94, "grad_norm": 1.0070714308613156, "learning_rate": 5.707860950897839e-06, "loss": 0.531, "step": 7396 }, { "epoch": 0.94, "grad_norm": 0.7500848913152038, "learning_rate": 5.706839739830411e-06, "loss": 0.5478, "step": 7397 }, { "epoch": 0.94, "grad_norm": 0.5618937124625564, "learning_rate": 5.705818498675074e-06, "loss": 0.4884, "step": 7398 }, { "epoch": 0.94, "grad_norm": 0.6870789565856263, "learning_rate": 5.704797227475299e-06, "loss": 0.4901, "step": 7399 }, { "epoch": 0.94, "grad_norm": 0.5456379918592325, "learning_rate": 5.703775926274559e-06, "loss": 0.5161, "step": 7400 }, { "epoch": 0.94, "grad_norm": 0.7181685879281244, "learning_rate": 5.702754595116325e-06, "loss": 0.6051, "step": 7401 }, { "epoch": 0.94, "grad_norm": 0.6961210746654559, "learning_rate": 5.701733234044075e-06, "loss": 0.5463, "step": 7402 }, { "epoch": 0.94, "grad_norm": 0.6056251106359543, "learning_rate": 5.700711843101283e-06, "loss": 0.5379, "step": 7403 }, { "epoch": 0.94, "grad_norm": 0.5872243201901712, "learning_rate": 5.699690422331426e-06, "loss": 0.5413, "step": 7404 }, { "epoch": 0.94, "grad_norm": 0.5540002123578797, "learning_rate": 5.698668971777985e-06, "loss": 0.503, "step": 7405 }, { "epoch": 0.94, "grad_norm": 0.8072134370319362, "learning_rate": 5.697647491484439e-06, "loss": 0.6224, "step": 7406 }, { "epoch": 0.94, "grad_norm": 0.5572745526912098, "learning_rate": 5.696625981494268e-06, "loss": 0.4906, "step": 7407 }, { "epoch": 0.94, "grad_norm": 0.6504237339968888, "learning_rate": 5.695604441850955e-06, "loss": 0.5197, "step": 7408 }, { "epoch": 0.94, "grad_norm": 0.5274329768422829, "learning_rate": 5.694582872597984e-06, "loss": 0.4954, "step": 7409 }, { "epoch": 0.94, "grad_norm": 0.928078603726346, "learning_rate": 5.69356127377884e-06, "loss": 0.6473, "step": 7410 }, { "epoch": 0.94, "grad_norm": 0.9812196709788339, "learning_rate": 5.692539645437009e-06, "loss": 0.6126, "step": 7411 }, { "epoch": 0.94, "grad_norm": 0.7582274940934607, "learning_rate": 5.691517987615976e-06, "loss": 0.6435, "step": 7412 }, { "epoch": 0.94, "grad_norm": 0.5944706589682655, "learning_rate": 5.690496300359234e-06, "loss": 0.5122, "step": 7413 }, { "epoch": 0.94, "grad_norm": 0.6105810228456057, "learning_rate": 5.68947458371027e-06, "loss": 0.5468, "step": 7414 }, { "epoch": 0.94, "grad_norm": 0.65134934033479, "learning_rate": 5.688452837712577e-06, "loss": 0.4929, "step": 7415 }, { "epoch": 0.94, "grad_norm": 0.9131277089851544, "learning_rate": 5.687431062409647e-06, "loss": 0.5851, "step": 7416 }, { "epoch": 0.94, "grad_norm": 0.8079190453130916, "learning_rate": 5.686409257844973e-06, "loss": 0.5972, "step": 7417 }, { "epoch": 0.95, "grad_norm": 0.8850021987073383, "learning_rate": 5.685387424062051e-06, "loss": 0.601, "step": 7418 }, { "epoch": 0.95, "grad_norm": 0.7905063975038278, "learning_rate": 5.684365561104375e-06, "loss": 0.6438, "step": 7419 }, { "epoch": 0.95, "grad_norm": 1.8213264804998532, "learning_rate": 5.683343669015444e-06, "loss": 0.5828, "step": 7420 }, { "epoch": 0.95, "grad_norm": 0.6870240698263178, "learning_rate": 5.682321747838758e-06, "loss": 0.5946, "step": 7421 }, { "epoch": 0.95, "grad_norm": 0.6722639333025503, "learning_rate": 5.681299797617815e-06, "loss": 0.5828, "step": 7422 }, { "epoch": 0.95, "grad_norm": 0.6818675552627741, "learning_rate": 5.680277818396117e-06, "loss": 0.5748, "step": 7423 }, { "epoch": 0.95, "grad_norm": 0.6275135559303036, "learning_rate": 5.679255810217167e-06, "loss": 0.4726, "step": 7424 }, { "epoch": 0.95, "grad_norm": 0.8163185683563445, "learning_rate": 5.678233773124465e-06, "loss": 0.6493, "step": 7425 }, { "epoch": 0.95, "grad_norm": 0.7144144806646856, "learning_rate": 5.6772117071615206e-06, "loss": 0.5964, "step": 7426 }, { "epoch": 0.95, "grad_norm": 0.7516934859178865, "learning_rate": 5.676189612371837e-06, "loss": 0.6067, "step": 7427 }, { "epoch": 0.95, "grad_norm": 0.5715928842235714, "learning_rate": 5.675167488798924e-06, "loss": 0.5164, "step": 7428 }, { "epoch": 0.95, "grad_norm": 0.8984798482547672, "learning_rate": 5.674145336486287e-06, "loss": 0.6277, "step": 7429 }, { "epoch": 0.95, "grad_norm": 0.9353574284200804, "learning_rate": 5.673123155477438e-06, "loss": 0.6345, "step": 7430 }, { "epoch": 0.95, "grad_norm": 0.6148041296681221, "learning_rate": 5.672100945815887e-06, "loss": 0.5372, "step": 7431 }, { "epoch": 0.95, "grad_norm": 0.9274117995181222, "learning_rate": 5.671078707545147e-06, "loss": 0.6141, "step": 7432 }, { "epoch": 0.95, "grad_norm": 1.2818584754791995, "learning_rate": 5.67005644070873e-06, "loss": 0.5977, "step": 7433 }, { "epoch": 0.95, "grad_norm": 0.5917544842531124, "learning_rate": 5.6690341453501515e-06, "loss": 0.5296, "step": 7434 }, { "epoch": 0.95, "grad_norm": 0.6038382946577495, "learning_rate": 5.668011821512929e-06, "loss": 0.4771, "step": 7435 }, { "epoch": 0.95, "grad_norm": 0.5992428069467104, "learning_rate": 5.666989469240576e-06, "loss": 0.5612, "step": 7436 }, { "epoch": 0.95, "grad_norm": 0.5447866721389806, "learning_rate": 5.665967088576613e-06, "loss": 0.4725, "step": 7437 }, { "epoch": 0.95, "grad_norm": 0.7557642963332849, "learning_rate": 5.664944679564559e-06, "loss": 0.5562, "step": 7438 }, { "epoch": 0.95, "grad_norm": 0.9271305379046939, "learning_rate": 5.663922242247936e-06, "loss": 0.6213, "step": 7439 }, { "epoch": 0.95, "grad_norm": 0.5811107774774571, "learning_rate": 5.6628997766702644e-06, "loss": 0.512, "step": 7440 }, { "epoch": 0.95, "grad_norm": 0.6768434141643868, "learning_rate": 5.6618772828750675e-06, "loss": 0.5404, "step": 7441 }, { "epoch": 0.95, "grad_norm": 0.5977941622466924, "learning_rate": 5.660854760905869e-06, "loss": 0.5048, "step": 7442 }, { "epoch": 0.95, "grad_norm": 0.763565937469353, "learning_rate": 5.659832210806195e-06, "loss": 0.6167, "step": 7443 }, { "epoch": 0.95, "grad_norm": 0.8373060044331139, "learning_rate": 5.6588096326195726e-06, "loss": 0.6458, "step": 7444 }, { "epoch": 0.95, "grad_norm": 0.6689128337283314, "learning_rate": 5.6577870263895306e-06, "loss": 0.4912, "step": 7445 }, { "epoch": 0.95, "grad_norm": 0.7595481738334218, "learning_rate": 5.6567643921595965e-06, "loss": 0.6323, "step": 7446 }, { "epoch": 0.95, "grad_norm": 0.5712844219189369, "learning_rate": 5.655741729973301e-06, "loss": 0.542, "step": 7447 }, { "epoch": 0.95, "grad_norm": 0.5786996986577434, "learning_rate": 5.654719039874175e-06, "loss": 0.4855, "step": 7448 }, { "epoch": 0.95, "grad_norm": 0.7366752618305401, "learning_rate": 5.653696321905752e-06, "loss": 0.6589, "step": 7449 }, { "epoch": 0.95, "grad_norm": 0.5312668299918883, "learning_rate": 5.6526735761115635e-06, "loss": 0.5062, "step": 7450 }, { "epoch": 0.95, "grad_norm": 0.8934322478513086, "learning_rate": 5.651650802535149e-06, "loss": 0.6418, "step": 7451 }, { "epoch": 0.95, "grad_norm": 0.6447074771875847, "learning_rate": 5.650628001220041e-06, "loss": 0.5331, "step": 7452 }, { "epoch": 0.95, "grad_norm": 0.7473002167035475, "learning_rate": 5.6496051722097785e-06, "loss": 0.5632, "step": 7453 }, { "epoch": 0.95, "grad_norm": 0.723231306865618, "learning_rate": 5.648582315547901e-06, "loss": 0.5889, "step": 7454 }, { "epoch": 0.95, "grad_norm": 0.7775924834948316, "learning_rate": 5.647559431277944e-06, "loss": 0.5167, "step": 7455 }, { "epoch": 0.95, "grad_norm": 0.7407817777605376, "learning_rate": 5.646536519443453e-06, "loss": 0.5996, "step": 7456 }, { "epoch": 0.95, "grad_norm": 0.6986294728385855, "learning_rate": 5.645513580087968e-06, "loss": 0.6367, "step": 7457 }, { "epoch": 0.95, "grad_norm": 0.597406972525196, "learning_rate": 5.644490613255034e-06, "loss": 0.5207, "step": 7458 }, { "epoch": 0.95, "grad_norm": 0.711716046917632, "learning_rate": 5.643467618988192e-06, "loss": 0.627, "step": 7459 }, { "epoch": 0.95, "grad_norm": 0.8041144330129631, "learning_rate": 5.642444597330992e-06, "loss": 0.5939, "step": 7460 }, { "epoch": 0.95, "grad_norm": 0.7501084350677372, "learning_rate": 5.6414215483269764e-06, "loss": 0.5278, "step": 7461 }, { "epoch": 0.95, "grad_norm": 0.6171344466764241, "learning_rate": 5.640398472019697e-06, "loss": 0.5095, "step": 7462 }, { "epoch": 0.95, "grad_norm": 0.6353046548901394, "learning_rate": 5.6393753684526995e-06, "loss": 0.5005, "step": 7463 }, { "epoch": 0.95, "grad_norm": 0.7233307148465343, "learning_rate": 5.638352237669537e-06, "loss": 0.5639, "step": 7464 }, { "epoch": 0.95, "grad_norm": 0.6222856119738878, "learning_rate": 5.637329079713758e-06, "loss": 0.4991, "step": 7465 }, { "epoch": 0.95, "grad_norm": 0.5494478442453682, "learning_rate": 5.636305894628917e-06, "loss": 0.4983, "step": 7466 }, { "epoch": 0.95, "grad_norm": 0.9094628837538682, "learning_rate": 5.635282682458568e-06, "loss": 0.6231, "step": 7467 }, { "epoch": 0.95, "grad_norm": 0.571839861934092, "learning_rate": 5.634259443246266e-06, "loss": 0.5163, "step": 7468 }, { "epoch": 0.95, "grad_norm": 0.7829450278475736, "learning_rate": 5.633236177035566e-06, "loss": 0.6332, "step": 7469 }, { "epoch": 0.95, "grad_norm": 0.9182791455086887, "learning_rate": 5.632212883870024e-06, "loss": 0.6257, "step": 7470 }, { "epoch": 0.95, "grad_norm": 0.6367248681491957, "learning_rate": 5.631189563793201e-06, "loss": 0.5602, "step": 7471 }, { "epoch": 0.95, "grad_norm": 0.592248131703522, "learning_rate": 5.630166216848656e-06, "loss": 0.5561, "step": 7472 }, { "epoch": 0.95, "grad_norm": 0.6855158527421498, "learning_rate": 5.629142843079948e-06, "loss": 0.6157, "step": 7473 }, { "epoch": 0.95, "grad_norm": 0.6711081524201122, "learning_rate": 5.6281194425306386e-06, "loss": 0.5747, "step": 7474 }, { "epoch": 0.95, "grad_norm": 0.9488468902943121, "learning_rate": 5.627096015244292e-06, "loss": 0.5873, "step": 7475 }, { "epoch": 0.95, "grad_norm": 0.5924103778816938, "learning_rate": 5.626072561264473e-06, "loss": 0.5409, "step": 7476 }, { "epoch": 0.95, "grad_norm": 0.6976615426673333, "learning_rate": 5.625049080634746e-06, "loss": 0.5064, "step": 7477 }, { "epoch": 0.95, "grad_norm": 0.7410077206632754, "learning_rate": 5.624025573398676e-06, "loss": 0.564, "step": 7478 }, { "epoch": 0.95, "grad_norm": 0.7758628136137053, "learning_rate": 5.623002039599832e-06, "loss": 0.5449, "step": 7479 }, { "epoch": 0.95, "grad_norm": 0.56500355210866, "learning_rate": 5.6219784792817825e-06, "loss": 0.536, "step": 7480 }, { "epoch": 0.95, "grad_norm": 0.8655068984818446, "learning_rate": 5.6209548924880954e-06, "loss": 0.6632, "step": 7481 }, { "epoch": 0.95, "grad_norm": 0.6461287796399382, "learning_rate": 5.619931279262343e-06, "loss": 0.5459, "step": 7482 }, { "epoch": 0.95, "grad_norm": 0.7727957998907783, "learning_rate": 5.618907639648098e-06, "loss": 0.6538, "step": 7483 }, { "epoch": 0.95, "grad_norm": 0.8576024726657329, "learning_rate": 5.617883973688931e-06, "loss": 0.6779, "step": 7484 }, { "epoch": 0.95, "grad_norm": 0.5699404090759821, "learning_rate": 5.61686028142842e-06, "loss": 0.5295, "step": 7485 }, { "epoch": 0.95, "grad_norm": 0.6045262952351791, "learning_rate": 5.615836562910136e-06, "loss": 0.4957, "step": 7486 }, { "epoch": 0.95, "grad_norm": 0.6796026296394505, "learning_rate": 5.614812818177657e-06, "loss": 0.558, "step": 7487 }, { "epoch": 0.95, "grad_norm": 0.7804945500216487, "learning_rate": 5.6137890472745625e-06, "loss": 0.6621, "step": 7488 }, { "epoch": 0.95, "grad_norm": 0.9389280388329666, "learning_rate": 5.61276525024443e-06, "loss": 0.6651, "step": 7489 }, { "epoch": 0.95, "grad_norm": 0.723619080503985, "learning_rate": 5.611741427130838e-06, "loss": 0.5922, "step": 7490 }, { "epoch": 0.95, "grad_norm": 1.4457476941116572, "learning_rate": 5.610717577977369e-06, "loss": 0.6387, "step": 7491 }, { "epoch": 0.95, "grad_norm": 0.7561310635183401, "learning_rate": 5.609693702827605e-06, "loss": 0.5484, "step": 7492 }, { "epoch": 0.95, "grad_norm": 0.6382758271178987, "learning_rate": 5.608669801725125e-06, "loss": 0.4749, "step": 7493 }, { "epoch": 0.95, "grad_norm": 0.659774268820042, "learning_rate": 5.607645874713519e-06, "loss": 0.5292, "step": 7494 }, { "epoch": 0.95, "grad_norm": 0.6203594822082383, "learning_rate": 5.60662192183637e-06, "loss": 0.5286, "step": 7495 }, { "epoch": 0.95, "grad_norm": 0.6710073324384463, "learning_rate": 5.605597943137264e-06, "loss": 0.5618, "step": 7496 }, { "epoch": 0.96, "grad_norm": 0.5949885700201625, "learning_rate": 5.6045739386597885e-06, "loss": 0.5116, "step": 7497 }, { "epoch": 0.96, "grad_norm": 0.6406614047485684, "learning_rate": 5.603549908447533e-06, "loss": 0.5583, "step": 7498 }, { "epoch": 0.96, "grad_norm": 0.5361097158972228, "learning_rate": 5.602525852544085e-06, "loss": 0.4929, "step": 7499 }, { "epoch": 0.96, "grad_norm": 0.7498696685528496, "learning_rate": 5.6015017709930385e-06, "loss": 0.6039, "step": 7500 }, { "epoch": 0.96, "grad_norm": 0.8061247815788423, "learning_rate": 5.600477663837983e-06, "loss": 0.5824, "step": 7501 }, { "epoch": 0.96, "grad_norm": 0.5361879481965269, "learning_rate": 5.599453531122513e-06, "loss": 0.5044, "step": 7502 }, { "epoch": 0.96, "grad_norm": 0.6590883410830884, "learning_rate": 5.5984293728902205e-06, "loss": 0.5061, "step": 7503 }, { "epoch": 0.96, "grad_norm": 0.6569733490302738, "learning_rate": 5.597405189184702e-06, "loss": 0.5126, "step": 7504 }, { "epoch": 0.96, "grad_norm": 0.5854814873595723, "learning_rate": 5.5963809800495535e-06, "loss": 0.55, "step": 7505 }, { "epoch": 0.96, "grad_norm": 0.6750277894763069, "learning_rate": 5.595356745528374e-06, "loss": 0.5024, "step": 7506 }, { "epoch": 0.96, "grad_norm": 0.5940732070440541, "learning_rate": 5.59433248566476e-06, "loss": 0.5455, "step": 7507 }, { "epoch": 0.96, "grad_norm": 0.6332071317074104, "learning_rate": 5.593308200502311e-06, "loss": 0.5383, "step": 7508 }, { "epoch": 0.96, "grad_norm": 0.79302800554713, "learning_rate": 5.5922838900846275e-06, "loss": 0.5952, "step": 7509 }, { "epoch": 0.96, "grad_norm": 0.757567081335012, "learning_rate": 5.591259554455311e-06, "loss": 0.6282, "step": 7510 }, { "epoch": 0.96, "grad_norm": 0.8590191699846675, "learning_rate": 5.590235193657965e-06, "loss": 0.6224, "step": 7511 }, { "epoch": 0.96, "grad_norm": 0.7424039259481556, "learning_rate": 5.5892108077361925e-06, "loss": 0.5549, "step": 7512 }, { "epoch": 0.96, "grad_norm": 0.6279115641151283, "learning_rate": 5.5881863967336e-06, "loss": 0.547, "step": 7513 }, { "epoch": 0.96, "grad_norm": 0.7931095274600247, "learning_rate": 5.5871619606937925e-06, "loss": 0.625, "step": 7514 }, { "epoch": 0.96, "grad_norm": 0.6995865900119276, "learning_rate": 5.586137499660376e-06, "loss": 0.6061, "step": 7515 }, { "epoch": 0.96, "grad_norm": 0.6616400129795555, "learning_rate": 5.585113013676958e-06, "loss": 0.586, "step": 7516 }, { "epoch": 0.96, "grad_norm": 0.8003756150682517, "learning_rate": 5.584088502787151e-06, "loss": 0.5592, "step": 7517 }, { "epoch": 0.96, "grad_norm": 0.7594131286486338, "learning_rate": 5.58306396703456e-06, "loss": 0.5948, "step": 7518 }, { "epoch": 0.96, "grad_norm": 0.701980554897668, "learning_rate": 5.5820394064628e-06, "loss": 0.609, "step": 7519 }, { "epoch": 0.96, "grad_norm": 0.6053234960436575, "learning_rate": 5.5810148211154835e-06, "loss": 0.4991, "step": 7520 }, { "epoch": 0.96, "grad_norm": 0.8351394011794444, "learning_rate": 5.579990211036223e-06, "loss": 0.6658, "step": 7521 }, { "epoch": 0.96, "grad_norm": 0.6514037457401015, "learning_rate": 5.578965576268632e-06, "loss": 0.5554, "step": 7522 }, { "epoch": 0.96, "grad_norm": 0.6118776643235391, "learning_rate": 5.577940916856326e-06, "loss": 0.504, "step": 7523 }, { "epoch": 0.96, "grad_norm": 0.6012276938343325, "learning_rate": 5.576916232842923e-06, "loss": 0.4909, "step": 7524 }, { "epoch": 0.96, "grad_norm": 0.6231388550961715, "learning_rate": 5.5758915242720395e-06, "loss": 0.5479, "step": 7525 }, { "epoch": 0.96, "grad_norm": 0.7024457676788421, "learning_rate": 5.5748667911872935e-06, "loss": 0.5909, "step": 7526 }, { "epoch": 0.96, "grad_norm": 0.6972453079687018, "learning_rate": 5.573842033632305e-06, "loss": 0.6119, "step": 7527 }, { "epoch": 0.96, "grad_norm": 0.7642496430911038, "learning_rate": 5.572817251650695e-06, "loss": 0.6192, "step": 7528 }, { "epoch": 0.96, "grad_norm": 0.8392541463793172, "learning_rate": 5.571792445286085e-06, "loss": 0.6536, "step": 7529 }, { "epoch": 0.96, "grad_norm": 0.5518529616944446, "learning_rate": 5.570767614582098e-06, "loss": 0.5169, "step": 7530 }, { "epoch": 0.96, "grad_norm": 0.8068960512228883, "learning_rate": 5.569742759582357e-06, "loss": 0.6492, "step": 7531 }, { "epoch": 0.96, "grad_norm": 0.8396541307872485, "learning_rate": 5.568717880330489e-06, "loss": 0.6041, "step": 7532 }, { "epoch": 0.96, "grad_norm": 0.6797223660958093, "learning_rate": 5.567692976870117e-06, "loss": 0.584, "step": 7533 }, { "epoch": 0.96, "grad_norm": 0.698770788248921, "learning_rate": 5.56666804924487e-06, "loss": 0.5687, "step": 7534 }, { "epoch": 0.96, "grad_norm": 0.7028067449758266, "learning_rate": 5.5656430974983735e-06, "loss": 0.6244, "step": 7535 }, { "epoch": 0.96, "grad_norm": 0.8473567542161662, "learning_rate": 5.564618121674258e-06, "loss": 0.5934, "step": 7536 }, { "epoch": 0.96, "grad_norm": 0.5850820347804717, "learning_rate": 5.563593121816155e-06, "loss": 0.5086, "step": 7537 }, { "epoch": 0.96, "grad_norm": 0.6427982414322996, "learning_rate": 5.562568097967691e-06, "loss": 0.6103, "step": 7538 }, { "epoch": 0.96, "grad_norm": 0.6724865274200578, "learning_rate": 5.5615430501725035e-06, "loss": 0.5032, "step": 7539 }, { "epoch": 0.96, "grad_norm": 0.652842206270668, "learning_rate": 5.560517978474222e-06, "loss": 0.4933, "step": 7540 }, { "epoch": 0.96, "grad_norm": 0.6119030785598977, "learning_rate": 5.5594928829164804e-06, "loss": 0.501, "step": 7541 }, { "epoch": 0.96, "grad_norm": 0.8612807450827021, "learning_rate": 5.558467763542916e-06, "loss": 0.5677, "step": 7542 }, { "epoch": 0.96, "grad_norm": 0.5175732781417085, "learning_rate": 5.557442620397162e-06, "loss": 0.4908, "step": 7543 }, { "epoch": 0.96, "grad_norm": 0.7201772136975116, "learning_rate": 5.556417453522858e-06, "loss": 0.5323, "step": 7544 }, { "epoch": 0.96, "grad_norm": 0.6234394421718971, "learning_rate": 5.55539226296364e-06, "loss": 0.5189, "step": 7545 }, { "epoch": 0.96, "grad_norm": 0.624635148078209, "learning_rate": 5.554367048763149e-06, "loss": 0.5214, "step": 7546 }, { "epoch": 0.96, "grad_norm": 0.5900965532139819, "learning_rate": 5.553341810965025e-06, "loss": 0.5515, "step": 7547 }, { "epoch": 0.96, "grad_norm": 0.7721391585343826, "learning_rate": 5.552316549612907e-06, "loss": 0.6673, "step": 7548 }, { "epoch": 0.96, "grad_norm": 0.7244636016195506, "learning_rate": 5.551291264750439e-06, "loss": 0.5335, "step": 7549 }, { "epoch": 0.96, "grad_norm": 0.764917421470679, "learning_rate": 5.550265956421263e-06, "loss": 0.6519, "step": 7550 }, { "epoch": 0.96, "grad_norm": 0.6327877244351915, "learning_rate": 5.549240624669025e-06, "loss": 0.5064, "step": 7551 }, { "epoch": 0.96, "grad_norm": 0.6637650168820303, "learning_rate": 5.548215269537368e-06, "loss": 0.5701, "step": 7552 }, { "epoch": 0.96, "grad_norm": 0.6466215346331949, "learning_rate": 5.5471898910699385e-06, "loss": 0.5233, "step": 7553 }, { "epoch": 0.96, "grad_norm": 0.7797419064637255, "learning_rate": 5.546164489310384e-06, "loss": 0.6099, "step": 7554 }, { "epoch": 0.96, "grad_norm": 0.6021766400263207, "learning_rate": 5.545139064302352e-06, "loss": 0.5427, "step": 7555 }, { "epoch": 0.96, "grad_norm": 0.640741001885347, "learning_rate": 5.544113616089493e-06, "loss": 0.5065, "step": 7556 }, { "epoch": 0.96, "grad_norm": 1.2158678722939376, "learning_rate": 5.543088144715455e-06, "loss": 0.6242, "step": 7557 }, { "epoch": 0.96, "grad_norm": 0.9070171850791727, "learning_rate": 5.542062650223892e-06, "loss": 0.6381, "step": 7558 }, { "epoch": 0.96, "grad_norm": 0.6833671135940843, "learning_rate": 5.5410371326584525e-06, "loss": 0.5495, "step": 7559 }, { "epoch": 0.96, "grad_norm": 0.7459351868089814, "learning_rate": 5.540011592062791e-06, "loss": 0.5814, "step": 7560 }, { "epoch": 0.96, "grad_norm": 0.8477969766915834, "learning_rate": 5.538986028480563e-06, "loss": 0.6157, "step": 7561 }, { "epoch": 0.96, "grad_norm": 0.664010709265732, "learning_rate": 5.537960441955421e-06, "loss": 0.5162, "step": 7562 }, { "epoch": 0.96, "grad_norm": 0.603346770470461, "learning_rate": 5.536934832531022e-06, "loss": 0.5331, "step": 7563 }, { "epoch": 0.96, "grad_norm": 0.7180540870225373, "learning_rate": 5.535909200251024e-06, "loss": 0.6083, "step": 7564 }, { "epoch": 0.96, "grad_norm": 0.5268191736598359, "learning_rate": 5.534883545159085e-06, "loss": 0.4378, "step": 7565 }, { "epoch": 0.96, "grad_norm": 0.648713835068757, "learning_rate": 5.5338578672988606e-06, "loss": 0.5791, "step": 7566 }, { "epoch": 0.96, "grad_norm": 0.5553914496705349, "learning_rate": 5.532832166714011e-06, "loss": 0.519, "step": 7567 }, { "epoch": 0.96, "grad_norm": 0.6280371686105825, "learning_rate": 5.531806443448202e-06, "loss": 0.5551, "step": 7568 }, { "epoch": 0.96, "grad_norm": 0.6024276775023125, "learning_rate": 5.530780697545091e-06, "loss": 0.5464, "step": 7569 }, { "epoch": 0.96, "grad_norm": 0.6414987537819092, "learning_rate": 5.529754929048342e-06, "loss": 0.5025, "step": 7570 }, { "epoch": 0.96, "grad_norm": 0.6149237158979458, "learning_rate": 5.528729138001619e-06, "loss": 0.4912, "step": 7571 }, { "epoch": 0.96, "grad_norm": 0.5677021856388555, "learning_rate": 5.5277033244485855e-06, "loss": 0.4997, "step": 7572 }, { "epoch": 0.96, "grad_norm": 0.7354021725784786, "learning_rate": 5.526677488432909e-06, "loss": 0.611, "step": 7573 }, { "epoch": 0.96, "grad_norm": 0.6210531429893793, "learning_rate": 5.525651629998252e-06, "loss": 0.522, "step": 7574 }, { "epoch": 0.97, "grad_norm": 0.585476354979317, "learning_rate": 5.5246257491882875e-06, "loss": 0.5378, "step": 7575 }, { "epoch": 0.97, "grad_norm": 0.6296899660493586, "learning_rate": 5.5235998460466796e-06, "loss": 0.5058, "step": 7576 }, { "epoch": 0.97, "grad_norm": 0.591274840977478, "learning_rate": 5.522573920617101e-06, "loss": 0.5376, "step": 7577 }, { "epoch": 0.97, "grad_norm": 0.5831344897862408, "learning_rate": 5.52154797294322e-06, "loss": 0.5258, "step": 7578 }, { "epoch": 0.97, "grad_norm": 1.0086384210930563, "learning_rate": 5.520522003068709e-06, "loss": 0.5897, "step": 7579 }, { "epoch": 0.97, "grad_norm": 0.8012128775170618, "learning_rate": 5.5194960110372376e-06, "loss": 0.6236, "step": 7580 }, { "epoch": 0.97, "grad_norm": 0.6149023152075509, "learning_rate": 5.518469996892483e-06, "loss": 0.576, "step": 7581 }, { "epoch": 0.97, "grad_norm": 0.6130289171762597, "learning_rate": 5.517443960678116e-06, "loss": 0.5589, "step": 7582 }, { "epoch": 0.97, "grad_norm": 0.5702572281951223, "learning_rate": 5.516417902437814e-06, "loss": 0.5385, "step": 7583 }, { "epoch": 0.97, "grad_norm": 0.6507560903497862, "learning_rate": 5.515391822215252e-06, "loss": 0.4782, "step": 7584 }, { "epoch": 0.97, "grad_norm": 0.7516982480369816, "learning_rate": 5.514365720054108e-06, "loss": 0.5578, "step": 7585 }, { "epoch": 0.97, "grad_norm": 0.5204808405538552, "learning_rate": 5.513339595998058e-06, "loss": 0.4889, "step": 7586 }, { "epoch": 0.97, "grad_norm": 0.6956055398196305, "learning_rate": 5.5123134500907825e-06, "loss": 0.5202, "step": 7587 }, { "epoch": 0.97, "grad_norm": 0.8328162300187069, "learning_rate": 5.51128728237596e-06, "loss": 0.5557, "step": 7588 }, { "epoch": 0.97, "grad_norm": 0.9534275043609638, "learning_rate": 5.5102610928972725e-06, "loss": 0.555, "step": 7589 }, { "epoch": 0.97, "grad_norm": 0.579336801661149, "learning_rate": 5.5092348816983985e-06, "loss": 0.5375, "step": 7590 }, { "epoch": 0.97, "grad_norm": 0.9391063810120132, "learning_rate": 5.508208648823025e-06, "loss": 0.5938, "step": 7591 }, { "epoch": 0.97, "grad_norm": 0.9695965226750517, "learning_rate": 5.507182394314832e-06, "loss": 0.6178, "step": 7592 }, { "epoch": 0.97, "grad_norm": 0.9005124611626333, "learning_rate": 5.506156118217506e-06, "loss": 0.6008, "step": 7593 }, { "epoch": 0.97, "grad_norm": 0.48799709277974174, "learning_rate": 5.5051298205747315e-06, "loss": 0.4413, "step": 7594 }, { "epoch": 0.97, "grad_norm": 0.7910818204108412, "learning_rate": 5.504103501430195e-06, "loss": 0.5167, "step": 7595 }, { "epoch": 0.97, "grad_norm": 0.5549774493898005, "learning_rate": 5.503077160827583e-06, "loss": 0.5302, "step": 7596 }, { "epoch": 0.97, "grad_norm": 0.8722196089232908, "learning_rate": 5.502050798810584e-06, "loss": 0.6307, "step": 7597 }, { "epoch": 0.97, "grad_norm": 0.7201682416213975, "learning_rate": 5.501024415422885e-06, "loss": 0.6432, "step": 7598 }, { "epoch": 0.97, "grad_norm": 0.8569120142915779, "learning_rate": 5.499998010708181e-06, "loss": 0.5749, "step": 7599 }, { "epoch": 0.97, "grad_norm": 0.7118726448812661, "learning_rate": 5.498971584710158e-06, "loss": 0.5539, "step": 7600 }, { "epoch": 0.97, "grad_norm": 0.596970360923443, "learning_rate": 5.497945137472508e-06, "loss": 0.5225, "step": 7601 }, { "epoch": 0.97, "grad_norm": 0.8129042638483244, "learning_rate": 5.496918669038927e-06, "loss": 0.6055, "step": 7602 }, { "epoch": 0.97, "grad_norm": 0.8967171840855497, "learning_rate": 5.495892179453104e-06, "loss": 0.5558, "step": 7603 }, { "epoch": 0.97, "grad_norm": 0.8305055714431683, "learning_rate": 5.494865668758737e-06, "loss": 0.5358, "step": 7604 }, { "epoch": 0.97, "grad_norm": 0.6550216912099499, "learning_rate": 5.493839136999517e-06, "loss": 0.5422, "step": 7605 }, { "epoch": 0.97, "grad_norm": 0.5836326198558194, "learning_rate": 5.492812584219146e-06, "loss": 0.4814, "step": 7606 }, { "epoch": 0.97, "grad_norm": 0.6999942862132048, "learning_rate": 5.491786010461317e-06, "loss": 0.5101, "step": 7607 }, { "epoch": 0.97, "grad_norm": 0.5963761150255326, "learning_rate": 5.49075941576973e-06, "loss": 0.5577, "step": 7608 }, { "epoch": 0.97, "grad_norm": 0.5399962748439829, "learning_rate": 5.4897328001880825e-06, "loss": 0.494, "step": 7609 }, { "epoch": 0.97, "grad_norm": 0.8377025018325649, "learning_rate": 5.488706163760075e-06, "loss": 0.6595, "step": 7610 }, { "epoch": 0.97, "grad_norm": 0.6355270949141507, "learning_rate": 5.487679506529405e-06, "loss": 0.5234, "step": 7611 }, { "epoch": 0.97, "grad_norm": 0.8021011466859977, "learning_rate": 5.48665282853978e-06, "loss": 0.6364, "step": 7612 }, { "epoch": 0.97, "grad_norm": 0.6488854554999794, "learning_rate": 5.485626129834898e-06, "loss": 0.5075, "step": 7613 }, { "epoch": 0.97, "grad_norm": 0.5743535463440743, "learning_rate": 5.484599410458464e-06, "loss": 0.5091, "step": 7614 }, { "epoch": 0.97, "grad_norm": 0.5576089792025481, "learning_rate": 5.483572670454181e-06, "loss": 0.4852, "step": 7615 }, { "epoch": 0.97, "grad_norm": 0.5250051752808631, "learning_rate": 5.482545909865755e-06, "loss": 0.533, "step": 7616 }, { "epoch": 0.97, "grad_norm": 0.6262217878642798, "learning_rate": 5.4815191287368914e-06, "loss": 0.5604, "step": 7617 }, { "epoch": 0.97, "grad_norm": 0.5474155704323684, "learning_rate": 5.480492327111298e-06, "loss": 0.5137, "step": 7618 }, { "epoch": 0.97, "grad_norm": 0.603400583681847, "learning_rate": 5.4794655050326806e-06, "loss": 0.5085, "step": 7619 }, { "epoch": 0.97, "grad_norm": 0.7122199404116781, "learning_rate": 5.4784386625447495e-06, "loss": 0.5079, "step": 7620 }, { "epoch": 0.97, "grad_norm": 0.5957349064292433, "learning_rate": 5.477411799691213e-06, "loss": 0.5013, "step": 7621 }, { "epoch": 0.97, "grad_norm": 0.5806603744617725, "learning_rate": 5.476384916515783e-06, "loss": 0.5218, "step": 7622 }, { "epoch": 0.97, "grad_norm": 0.7985190895949581, "learning_rate": 5.475358013062167e-06, "loss": 0.6171, "step": 7623 }, { "epoch": 0.97, "grad_norm": 0.8152396987787083, "learning_rate": 5.474331089374081e-06, "loss": 0.606, "step": 7624 }, { "epoch": 0.97, "grad_norm": 0.7558770416267557, "learning_rate": 5.473304145495236e-06, "loss": 0.5792, "step": 7625 }, { "epoch": 0.97, "grad_norm": 0.6950817632840202, "learning_rate": 5.472277181469346e-06, "loss": 0.5081, "step": 7626 }, { "epoch": 0.97, "grad_norm": 0.7726956635313686, "learning_rate": 5.471250197340127e-06, "loss": 0.6511, "step": 7627 }, { "epoch": 0.97, "grad_norm": 0.5827437905910543, "learning_rate": 5.4702231931512914e-06, "loss": 0.4755, "step": 7628 }, { "epoch": 0.97, "grad_norm": 0.6035578999242772, "learning_rate": 5.4691961689465565e-06, "loss": 0.4976, "step": 7629 }, { "epoch": 0.97, "grad_norm": 0.8993407079877661, "learning_rate": 5.468169124769641e-06, "loss": 0.6566, "step": 7630 }, { "epoch": 0.97, "grad_norm": 0.568348462891462, "learning_rate": 5.467142060664262e-06, "loss": 0.5087, "step": 7631 }, { "epoch": 0.97, "grad_norm": 0.9491565850373038, "learning_rate": 5.4661149766741385e-06, "loss": 0.6116, "step": 7632 }, { "epoch": 0.97, "grad_norm": 0.6942050574064412, "learning_rate": 5.465087872842989e-06, "loss": 0.5743, "step": 7633 }, { "epoch": 0.97, "grad_norm": 0.5662075550149903, "learning_rate": 5.464060749214536e-06, "loss": 0.5048, "step": 7634 }, { "epoch": 0.97, "grad_norm": 0.878023432634221, "learning_rate": 5.463033605832499e-06, "loss": 0.5795, "step": 7635 }, { "epoch": 0.97, "grad_norm": 0.6256919852216055, "learning_rate": 5.4620064427406005e-06, "loss": 0.5291, "step": 7636 }, { "epoch": 0.97, "grad_norm": 0.683820273611286, "learning_rate": 5.460979259982565e-06, "loss": 0.544, "step": 7637 }, { "epoch": 0.97, "grad_norm": 0.6462524444088971, "learning_rate": 5.459952057602116e-06, "loss": 0.5134, "step": 7638 }, { "epoch": 0.97, "grad_norm": 0.6712443709615155, "learning_rate": 5.458924835642977e-06, "loss": 0.5097, "step": 7639 }, { "epoch": 0.97, "grad_norm": 0.740364420120419, "learning_rate": 5.457897594148876e-06, "loss": 0.5932, "step": 7640 }, { "epoch": 0.97, "grad_norm": 0.5446827282718025, "learning_rate": 5.456870333163535e-06, "loss": 0.4669, "step": 7641 }, { "epoch": 0.97, "grad_norm": 0.6786201454537524, "learning_rate": 5.455843052730684e-06, "loss": 0.5739, "step": 7642 }, { "epoch": 0.97, "grad_norm": 0.9191674631580304, "learning_rate": 5.454815752894051e-06, "loss": 0.6399, "step": 7643 }, { "epoch": 0.97, "grad_norm": 0.7453559798957378, "learning_rate": 5.4537884336973665e-06, "loss": 0.5744, "step": 7644 }, { "epoch": 0.97, "grad_norm": 0.6533767995417032, "learning_rate": 5.452761095184358e-06, "loss": 0.5364, "step": 7645 }, { "epoch": 0.97, "grad_norm": 0.7773539641585071, "learning_rate": 5.451733737398756e-06, "loss": 0.6201, "step": 7646 }, { "epoch": 0.97, "grad_norm": 0.6534543970475598, "learning_rate": 5.4507063603842924e-06, "loss": 0.536, "step": 7647 }, { "epoch": 0.97, "grad_norm": 0.6208033437849065, "learning_rate": 5.449678964184699e-06, "loss": 0.5493, "step": 7648 }, { "epoch": 0.97, "grad_norm": 0.9606060366491064, "learning_rate": 5.44865154884371e-06, "loss": 0.6055, "step": 7649 }, { "epoch": 0.97, "grad_norm": 0.7554072674016378, "learning_rate": 5.447624114405058e-06, "loss": 0.5892, "step": 7650 }, { "epoch": 0.97, "grad_norm": 0.7577052782835538, "learning_rate": 5.446596660912478e-06, "loss": 0.5999, "step": 7651 }, { "epoch": 0.97, "grad_norm": 0.7184803457220917, "learning_rate": 5.445569188409703e-06, "loss": 0.6636, "step": 7652 }, { "epoch": 0.97, "grad_norm": 0.7554933362400414, "learning_rate": 5.444541696940473e-06, "loss": 0.6109, "step": 7653 }, { "epoch": 0.98, "grad_norm": 0.6525645721219876, "learning_rate": 5.443514186548524e-06, "loss": 0.5233, "step": 7654 }, { "epoch": 0.98, "grad_norm": 0.8286323701669247, "learning_rate": 5.442486657277593e-06, "loss": 0.6284, "step": 7655 }, { "epoch": 0.98, "grad_norm": 0.6592548567264946, "learning_rate": 5.44145910917142e-06, "loss": 0.6048, "step": 7656 }, { "epoch": 0.98, "grad_norm": 0.5783620509608063, "learning_rate": 5.440431542273742e-06, "loss": 0.4733, "step": 7657 }, { "epoch": 0.98, "grad_norm": 0.636704869243039, "learning_rate": 5.439403956628302e-06, "loss": 0.5589, "step": 7658 }, { "epoch": 0.98, "grad_norm": 0.6829496157119364, "learning_rate": 5.438376352278839e-06, "loss": 0.4903, "step": 7659 }, { "epoch": 0.98, "grad_norm": 0.5822075259785565, "learning_rate": 5.437348729269094e-06, "loss": 0.5102, "step": 7660 }, { "epoch": 0.98, "grad_norm": 0.7770867537312597, "learning_rate": 5.436321087642813e-06, "loss": 0.5721, "step": 7661 }, { "epoch": 0.98, "grad_norm": 0.7628500114411533, "learning_rate": 5.435293427443737e-06, "loss": 0.6139, "step": 7662 }, { "epoch": 0.98, "grad_norm": 1.0242763930911243, "learning_rate": 5.434265748715611e-06, "loss": 0.6165, "step": 7663 }, { "epoch": 0.98, "grad_norm": 0.8831931861429525, "learning_rate": 5.4332380515021796e-06, "loss": 0.5952, "step": 7664 }, { "epoch": 0.98, "grad_norm": 0.9927954757870369, "learning_rate": 5.432210335847189e-06, "loss": 0.6282, "step": 7665 }, { "epoch": 0.98, "grad_norm": 0.6387157661274618, "learning_rate": 5.431182601794386e-06, "loss": 0.5411, "step": 7666 }, { "epoch": 0.98, "grad_norm": 0.8188858012671504, "learning_rate": 5.430154849387515e-06, "loss": 0.5674, "step": 7667 }, { "epoch": 0.98, "grad_norm": 0.6197312203439811, "learning_rate": 5.42912707867033e-06, "loss": 0.4704, "step": 7668 }, { "epoch": 0.98, "grad_norm": 0.6677575345490049, "learning_rate": 5.4280992896865744e-06, "loss": 0.5566, "step": 7669 }, { "epoch": 0.98, "grad_norm": 0.5428399145339392, "learning_rate": 5.427071482480001e-06, "loss": 0.4867, "step": 7670 }, { "epoch": 0.98, "grad_norm": 0.763064482947748, "learning_rate": 5.42604365709436e-06, "loss": 0.5985, "step": 7671 }, { "epoch": 0.98, "grad_norm": 0.6334282403203839, "learning_rate": 5.425015813573401e-06, "loss": 0.5388, "step": 7672 }, { "epoch": 0.98, "grad_norm": 0.865313502163088, "learning_rate": 5.423987951960876e-06, "loss": 0.6409, "step": 7673 }, { "epoch": 0.98, "grad_norm": 0.6051889533901729, "learning_rate": 5.42296007230054e-06, "loss": 0.5532, "step": 7674 }, { "epoch": 0.98, "grad_norm": 1.20833480018582, "learning_rate": 5.421932174636145e-06, "loss": 0.6208, "step": 7675 }, { "epoch": 0.98, "grad_norm": 0.5883651794021639, "learning_rate": 5.4209042590114455e-06, "loss": 0.5447, "step": 7676 }, { "epoch": 0.98, "grad_norm": 0.7698686865402535, "learning_rate": 5.419876325470197e-06, "loss": 0.5948, "step": 7677 }, { "epoch": 0.98, "grad_norm": 0.7493995078027337, "learning_rate": 5.418848374056156e-06, "loss": 0.625, "step": 7678 }, { "epoch": 0.98, "grad_norm": 0.696608713288634, "learning_rate": 5.417820404813075e-06, "loss": 0.593, "step": 7679 }, { "epoch": 0.98, "grad_norm": 0.5739480096523812, "learning_rate": 5.416792417784718e-06, "loss": 0.4881, "step": 7680 }, { "epoch": 0.98, "grad_norm": 0.6759667918026931, "learning_rate": 5.415764413014838e-06, "loss": 0.5221, "step": 7681 }, { "epoch": 0.98, "grad_norm": 0.7777323902892049, "learning_rate": 5.414736390547196e-06, "loss": 0.6289, "step": 7682 }, { "epoch": 0.98, "grad_norm": 0.7779993246484698, "learning_rate": 5.4137083504255506e-06, "loss": 0.656, "step": 7683 }, { "epoch": 0.98, "grad_norm": 0.7173564828586574, "learning_rate": 5.412680292693664e-06, "loss": 0.6188, "step": 7684 }, { "epoch": 0.98, "grad_norm": 0.7379808975637764, "learning_rate": 5.411652217395294e-06, "loss": 0.525, "step": 7685 }, { "epoch": 0.98, "grad_norm": 0.5837568082783187, "learning_rate": 5.410624124574206e-06, "loss": 0.5123, "step": 7686 }, { "epoch": 0.98, "grad_norm": 0.7511021320766276, "learning_rate": 5.409596014274161e-06, "loss": 0.5789, "step": 7687 }, { "epoch": 0.98, "grad_norm": 0.8279033722935091, "learning_rate": 5.408567886538922e-06, "loss": 0.6342, "step": 7688 }, { "epoch": 0.98, "grad_norm": 0.8876551816268368, "learning_rate": 5.4075397414122555e-06, "loss": 0.5917, "step": 7689 }, { "epoch": 0.98, "grad_norm": 0.7098891922718354, "learning_rate": 5.406511578937923e-06, "loss": 0.6053, "step": 7690 }, { "epoch": 0.98, "grad_norm": 0.8329051936661772, "learning_rate": 5.405483399159691e-06, "loss": 0.6319, "step": 7691 }, { "epoch": 0.98, "grad_norm": 0.8163348430392673, "learning_rate": 5.404455202121327e-06, "loss": 0.6229, "step": 7692 }, { "epoch": 0.98, "grad_norm": 0.9204914237534364, "learning_rate": 5.403426987866598e-06, "loss": 0.65, "step": 7693 }, { "epoch": 0.98, "grad_norm": 0.5903937899204389, "learning_rate": 5.402398756439271e-06, "loss": 0.5153, "step": 7694 }, { "epoch": 0.98, "grad_norm": 0.9063080203721176, "learning_rate": 5.401370507883116e-06, "loss": 0.6502, "step": 7695 }, { "epoch": 0.98, "grad_norm": 0.7562293817119972, "learning_rate": 5.400342242241899e-06, "loss": 0.6432, "step": 7696 }, { "epoch": 0.98, "grad_norm": 0.6165267075100698, "learning_rate": 5.399313959559394e-06, "loss": 0.519, "step": 7697 }, { "epoch": 0.98, "grad_norm": 0.5547820862455214, "learning_rate": 5.398285659879368e-06, "loss": 0.5045, "step": 7698 }, { "epoch": 0.98, "grad_norm": 0.791158681260808, "learning_rate": 5.397257343245595e-06, "loss": 0.6144, "step": 7699 }, { "epoch": 0.98, "grad_norm": 0.5458784560907478, "learning_rate": 5.396229009701846e-06, "loss": 0.5146, "step": 7700 }, { "epoch": 0.98, "grad_norm": 0.7107460281972412, "learning_rate": 5.395200659291895e-06, "loss": 0.607, "step": 7701 }, { "epoch": 0.98, "grad_norm": 0.9041061139278497, "learning_rate": 5.394172292059514e-06, "loss": 0.6222, "step": 7702 }, { "epoch": 0.98, "grad_norm": 0.5433163520556715, "learning_rate": 5.39314390804848e-06, "loss": 0.4533, "step": 7703 }, { "epoch": 0.98, "grad_norm": 0.6791522029686936, "learning_rate": 5.392115507302562e-06, "loss": 0.5712, "step": 7704 }, { "epoch": 0.98, "grad_norm": 0.8935288663499319, "learning_rate": 5.391087089865543e-06, "loss": 0.5653, "step": 7705 }, { "epoch": 0.98, "grad_norm": 0.6239536678147045, "learning_rate": 5.390058655781196e-06, "loss": 0.5159, "step": 7706 }, { "epoch": 0.98, "grad_norm": 0.7265561882945888, "learning_rate": 5.389030205093299e-06, "loss": 0.5767, "step": 7707 }, { "epoch": 0.98, "grad_norm": 0.7770451388491456, "learning_rate": 5.388001737845628e-06, "loss": 0.575, "step": 7708 }, { "epoch": 0.98, "grad_norm": 0.760367495109844, "learning_rate": 5.386973254081964e-06, "loss": 0.6234, "step": 7709 }, { "epoch": 0.98, "grad_norm": 0.773868965923522, "learning_rate": 5.3859447538460855e-06, "loss": 0.55, "step": 7710 }, { "epoch": 0.98, "grad_norm": 0.5700509815885804, "learning_rate": 5.384916237181771e-06, "loss": 0.4986, "step": 7711 }, { "epoch": 0.98, "grad_norm": 0.5711033557274912, "learning_rate": 5.3838877041328034e-06, "loss": 0.5527, "step": 7712 }, { "epoch": 0.98, "grad_norm": 0.9728050219099901, "learning_rate": 5.382859154742962e-06, "loss": 0.6433, "step": 7713 }, { "epoch": 0.98, "grad_norm": 0.682096310741079, "learning_rate": 5.3818305890560285e-06, "loss": 0.5737, "step": 7714 }, { "epoch": 0.98, "grad_norm": 0.7709356162527241, "learning_rate": 5.380802007115788e-06, "loss": 0.6538, "step": 7715 }, { "epoch": 0.98, "grad_norm": 0.701122406915414, "learning_rate": 5.379773408966025e-06, "loss": 0.5553, "step": 7716 }, { "epoch": 0.98, "grad_norm": 0.9142161363093375, "learning_rate": 5.37874479465052e-06, "loss": 0.5796, "step": 7717 }, { "epoch": 0.98, "grad_norm": 0.7206385256700651, "learning_rate": 5.377716164213059e-06, "loss": 0.5651, "step": 7718 }, { "epoch": 0.98, "grad_norm": 0.7496814463963307, "learning_rate": 5.376687517697428e-06, "loss": 0.5984, "step": 7719 }, { "epoch": 0.98, "grad_norm": 0.7629454310965097, "learning_rate": 5.375658855147415e-06, "loss": 0.664, "step": 7720 }, { "epoch": 0.98, "grad_norm": 0.7155635146549625, "learning_rate": 5.374630176606802e-06, "loss": 0.5433, "step": 7721 }, { "epoch": 0.98, "grad_norm": 0.7681578901605628, "learning_rate": 5.373601482119381e-06, "loss": 0.5762, "step": 7722 }, { "epoch": 0.98, "grad_norm": 0.5935637803021613, "learning_rate": 5.3725727717289375e-06, "loss": 0.4968, "step": 7723 }, { "epoch": 0.98, "grad_norm": 0.7929195390306502, "learning_rate": 5.371544045479264e-06, "loss": 0.6021, "step": 7724 }, { "epoch": 0.98, "grad_norm": 0.7285118946070908, "learning_rate": 5.370515303414146e-06, "loss": 0.6269, "step": 7725 }, { "epoch": 0.98, "grad_norm": 0.8765799796380599, "learning_rate": 5.369486545577377e-06, "loss": 0.6, "step": 7726 }, { "epoch": 0.98, "grad_norm": 1.0845970718274653, "learning_rate": 5.368457772012745e-06, "loss": 0.5602, "step": 7727 }, { "epoch": 0.98, "grad_norm": 0.7734004673846682, "learning_rate": 5.367428982764045e-06, "loss": 0.6608, "step": 7728 }, { "epoch": 0.98, "grad_norm": 0.6317609717150813, "learning_rate": 5.366400177875064e-06, "loss": 0.5663, "step": 7729 }, { "epoch": 0.98, "grad_norm": 0.5557156098948434, "learning_rate": 5.3653713573896e-06, "loss": 0.5377, "step": 7730 }, { "epoch": 0.98, "grad_norm": 0.6271265655965282, "learning_rate": 5.364342521351446e-06, "loss": 0.5328, "step": 7731 }, { "epoch": 0.99, "grad_norm": 0.6090695624168665, "learning_rate": 5.3633136698043945e-06, "loss": 0.5201, "step": 7732 }, { "epoch": 0.99, "grad_norm": 0.6289026450117043, "learning_rate": 5.36228480279224e-06, "loss": 0.4897, "step": 7733 }, { "epoch": 0.99, "grad_norm": 0.756977882233415, "learning_rate": 5.361255920358781e-06, "loss": 0.6615, "step": 7734 }, { "epoch": 0.99, "grad_norm": 0.6408538710284514, "learning_rate": 5.360227022547809e-06, "loss": 0.5532, "step": 7735 }, { "epoch": 0.99, "grad_norm": 0.6004730240822013, "learning_rate": 5.359198109403127e-06, "loss": 0.5081, "step": 7736 }, { "epoch": 0.99, "grad_norm": 0.5559908687298513, "learning_rate": 5.358169180968527e-06, "loss": 0.4976, "step": 7737 }, { "epoch": 0.99, "grad_norm": 0.6404008594146845, "learning_rate": 5.357140237287811e-06, "loss": 0.5139, "step": 7738 }, { "epoch": 0.99, "grad_norm": 0.7470665460472893, "learning_rate": 5.356111278404777e-06, "loss": 0.626, "step": 7739 }, { "epoch": 0.99, "grad_norm": 0.5834475636775669, "learning_rate": 5.355082304363221e-06, "loss": 0.4851, "step": 7740 }, { "epoch": 0.99, "grad_norm": 0.5896010140170032, "learning_rate": 5.354053315206947e-06, "loss": 0.4941, "step": 7741 }, { "epoch": 0.99, "grad_norm": 1.0499949451309503, "learning_rate": 5.353024310979757e-06, "loss": 0.6522, "step": 7742 }, { "epoch": 0.99, "grad_norm": 0.5054924310076513, "learning_rate": 5.351995291725448e-06, "loss": 0.4554, "step": 7743 }, { "epoch": 0.99, "grad_norm": 0.6677973829308487, "learning_rate": 5.350966257487825e-06, "loss": 0.5491, "step": 7744 }, { "epoch": 0.99, "grad_norm": 0.7075294731007261, "learning_rate": 5.34993720831069e-06, "loss": 0.5112, "step": 7745 }, { "epoch": 0.99, "grad_norm": 0.9745288610078423, "learning_rate": 5.348908144237846e-06, "loss": 0.6473, "step": 7746 }, { "epoch": 0.99, "grad_norm": 0.5622815945261704, "learning_rate": 5.347879065313096e-06, "loss": 0.4875, "step": 7747 }, { "epoch": 0.99, "grad_norm": 0.6692257317838569, "learning_rate": 5.346849971580248e-06, "loss": 0.5901, "step": 7748 }, { "epoch": 0.99, "grad_norm": 0.6786583804214478, "learning_rate": 5.345820863083105e-06, "loss": 0.5365, "step": 7749 }, { "epoch": 0.99, "grad_norm": 0.6105684261085144, "learning_rate": 5.344791739865471e-06, "loss": 0.5546, "step": 7750 }, { "epoch": 0.99, "grad_norm": 0.8076388076080914, "learning_rate": 5.343762601971156e-06, "loss": 0.6665, "step": 7751 }, { "epoch": 0.99, "grad_norm": 0.8570977591880408, "learning_rate": 5.342733449443965e-06, "loss": 0.6219, "step": 7752 }, { "epoch": 0.99, "grad_norm": 0.6370562571564155, "learning_rate": 5.341704282327705e-06, "loss": 0.5322, "step": 7753 }, { "epoch": 0.99, "grad_norm": 0.6260290500439879, "learning_rate": 5.340675100666188e-06, "loss": 0.5135, "step": 7754 }, { "epoch": 0.99, "grad_norm": 0.5904676112175424, "learning_rate": 5.33964590450322e-06, "loss": 0.4997, "step": 7755 }, { "epoch": 0.99, "grad_norm": 0.7451974252699439, "learning_rate": 5.338616693882611e-06, "loss": 0.5632, "step": 7756 }, { "epoch": 0.99, "grad_norm": 0.7192540204047387, "learning_rate": 5.337587468848171e-06, "loss": 0.5441, "step": 7757 }, { "epoch": 0.99, "grad_norm": 0.622893404555686, "learning_rate": 5.336558229443712e-06, "loss": 0.547, "step": 7758 }, { "epoch": 0.99, "grad_norm": 0.6279810475129116, "learning_rate": 5.335528975713045e-06, "loss": 0.4898, "step": 7759 }, { "epoch": 0.99, "grad_norm": 1.7152529339530527, "learning_rate": 5.33449970769998e-06, "loss": 0.5411, "step": 7760 }, { "epoch": 0.99, "grad_norm": 0.7052606351232354, "learning_rate": 5.333470425448332e-06, "loss": 0.603, "step": 7761 }, { "epoch": 0.99, "grad_norm": 0.5823979446585354, "learning_rate": 5.332441129001914e-06, "loss": 0.5626, "step": 7762 }, { "epoch": 0.99, "grad_norm": 0.7608170523306291, "learning_rate": 5.331411818404539e-06, "loss": 0.5586, "step": 7763 }, { "epoch": 0.99, "grad_norm": 0.6128208302282596, "learning_rate": 5.3303824937000225e-06, "loss": 0.5629, "step": 7764 }, { "epoch": 0.99, "grad_norm": 0.7164929974776274, "learning_rate": 5.329353154932179e-06, "loss": 0.5765, "step": 7765 }, { "epoch": 0.99, "grad_norm": 0.7713169793183182, "learning_rate": 5.328323802144822e-06, "loss": 0.5695, "step": 7766 }, { "epoch": 0.99, "grad_norm": 0.9265304642052163, "learning_rate": 5.327294435381772e-06, "loss": 0.6818, "step": 7767 }, { "epoch": 0.99, "grad_norm": 0.825508822067408, "learning_rate": 5.326265054686844e-06, "loss": 0.5846, "step": 7768 }, { "epoch": 0.99, "grad_norm": 0.628665724668007, "learning_rate": 5.325235660103856e-06, "loss": 0.5755, "step": 7769 }, { "epoch": 0.99, "grad_norm": 0.66366246560793, "learning_rate": 5.324206251676623e-06, "loss": 0.5227, "step": 7770 }, { "epoch": 0.99, "grad_norm": 0.6761650849348263, "learning_rate": 5.323176829448967e-06, "loss": 0.5173, "step": 7771 }, { "epoch": 0.99, "grad_norm": 0.7199382572637044, "learning_rate": 5.322147393464706e-06, "loss": 0.5702, "step": 7772 }, { "epoch": 0.99, "grad_norm": 0.5887790917193235, "learning_rate": 5.321117943767661e-06, "loss": 0.5038, "step": 7773 }, { "epoch": 0.99, "grad_norm": 4.031051151846374, "learning_rate": 5.320088480401649e-06, "loss": 0.6142, "step": 7774 }, { "epoch": 0.99, "grad_norm": 0.7283028926059821, "learning_rate": 5.319059003410496e-06, "loss": 0.5782, "step": 7775 }, { "epoch": 0.99, "grad_norm": 0.7614281547964779, "learning_rate": 5.318029512838018e-06, "loss": 0.6233, "step": 7776 }, { "epoch": 0.99, "grad_norm": 0.7501909888112255, "learning_rate": 5.317000008728042e-06, "loss": 0.5937, "step": 7777 }, { "epoch": 0.99, "grad_norm": 0.737652573312114, "learning_rate": 5.315970491124387e-06, "loss": 0.5731, "step": 7778 }, { "epoch": 0.99, "grad_norm": 0.7259415196763576, "learning_rate": 5.314940960070879e-06, "loss": 0.5387, "step": 7779 }, { "epoch": 0.99, "grad_norm": 0.5689656767728895, "learning_rate": 5.313911415611341e-06, "loss": 0.5163, "step": 7780 }, { "epoch": 0.99, "grad_norm": 1.2708990744823239, "learning_rate": 5.312881857789596e-06, "loss": 0.6402, "step": 7781 }, { "epoch": 0.99, "grad_norm": 0.6190638294954918, "learning_rate": 5.31185228664947e-06, "loss": 0.5644, "step": 7782 }, { "epoch": 0.99, "grad_norm": 0.6110976970031593, "learning_rate": 5.31082270223479e-06, "loss": 0.5053, "step": 7783 }, { "epoch": 0.99, "grad_norm": 0.5809708065534906, "learning_rate": 5.309793104589379e-06, "loss": 0.4796, "step": 7784 }, { "epoch": 0.99, "grad_norm": 0.5963783041967836, "learning_rate": 5.308763493757067e-06, "loss": 0.517, "step": 7785 }, { "epoch": 0.99, "grad_norm": 0.7553283451643742, "learning_rate": 5.30773386978168e-06, "loss": 0.5929, "step": 7786 }, { "epoch": 0.99, "grad_norm": 0.812690534696544, "learning_rate": 5.306704232707045e-06, "loss": 0.6016, "step": 7787 }, { "epoch": 0.99, "grad_norm": 0.6015041572317619, "learning_rate": 5.305674582576991e-06, "loss": 0.4999, "step": 7788 }, { "epoch": 0.99, "grad_norm": 1.089798022059888, "learning_rate": 5.304644919435347e-06, "loss": 0.6408, "step": 7789 }, { "epoch": 0.99, "grad_norm": 0.8056609174808825, "learning_rate": 5.303615243325942e-06, "loss": 0.5716, "step": 7790 }, { "epoch": 0.99, "grad_norm": 0.8010464937966264, "learning_rate": 5.302585554292606e-06, "loss": 0.5956, "step": 7791 }, { "epoch": 0.99, "grad_norm": 0.6617620654217385, "learning_rate": 5.30155585237917e-06, "loss": 0.5772, "step": 7792 }, { "epoch": 0.99, "grad_norm": 0.5711171743383584, "learning_rate": 5.300526137629465e-06, "loss": 0.5067, "step": 7793 }, { "epoch": 0.99, "grad_norm": 0.5805484877470243, "learning_rate": 5.2994964100873225e-06, "loss": 0.5578, "step": 7794 }, { "epoch": 0.99, "grad_norm": 0.5776063981112649, "learning_rate": 5.2984666697965755e-06, "loss": 0.4718, "step": 7795 }, { "epoch": 0.99, "grad_norm": 0.7996069024995108, "learning_rate": 5.297436916801057e-06, "loss": 0.6427, "step": 7796 }, { "epoch": 0.99, "grad_norm": 0.7790259147680327, "learning_rate": 5.296407151144597e-06, "loss": 0.631, "step": 7797 }, { "epoch": 0.99, "grad_norm": 0.6982858058000818, "learning_rate": 5.295377372871033e-06, "loss": 0.5038, "step": 7798 }, { "epoch": 0.99, "grad_norm": 0.6696859042299708, "learning_rate": 5.2943475820241975e-06, "loss": 0.5594, "step": 7799 }, { "epoch": 0.99, "grad_norm": 0.9531840280237015, "learning_rate": 5.293317778647927e-06, "loss": 0.6101, "step": 7800 }, { "epoch": 0.99, "grad_norm": 0.6875684693717123, "learning_rate": 5.292287962786055e-06, "loss": 0.5306, "step": 7801 }, { "epoch": 0.99, "grad_norm": 0.713596805921403, "learning_rate": 5.29125813448242e-06, "loss": 0.5671, "step": 7802 }, { "epoch": 0.99, "grad_norm": 0.5320728376249004, "learning_rate": 5.290228293780855e-06, "loss": 0.4567, "step": 7803 }, { "epoch": 0.99, "grad_norm": 0.774006308745748, "learning_rate": 5.2891984407252e-06, "loss": 0.6245, "step": 7804 }, { "epoch": 0.99, "grad_norm": 0.5712094596877593, "learning_rate": 5.2881685753592915e-06, "loss": 0.4808, "step": 7805 }, { "epoch": 0.99, "grad_norm": 0.6623986780775507, "learning_rate": 5.2871386977269675e-06, "loss": 0.5595, "step": 7806 }, { "epoch": 0.99, "grad_norm": 0.7005396887928569, "learning_rate": 5.286108807872068e-06, "loss": 0.5233, "step": 7807 }, { "epoch": 0.99, "grad_norm": 0.7504345644670651, "learning_rate": 5.28507890583843e-06, "loss": 0.5768, "step": 7808 }, { "epoch": 0.99, "grad_norm": 0.7364170525226253, "learning_rate": 5.284048991669892e-06, "loss": 0.5746, "step": 7809 }, { "epoch": 0.99, "grad_norm": 0.9293607265773179, "learning_rate": 5.283019065410298e-06, "loss": 0.579, "step": 7810 }, { "epoch": 1.0, "grad_norm": 0.8804876851137308, "learning_rate": 5.281989127103486e-06, "loss": 0.6288, "step": 7811 }, { "epoch": 1.0, "grad_norm": 0.5648661112293749, "learning_rate": 5.280959176793299e-06, "loss": 0.5166, "step": 7812 }, { "epoch": 1.0, "grad_norm": 0.641143481452317, "learning_rate": 5.279929214523577e-06, "loss": 0.5385, "step": 7813 }, { "epoch": 1.0, "grad_norm": 0.7492285420082699, "learning_rate": 5.278899240338164e-06, "loss": 0.5846, "step": 7814 }, { "epoch": 1.0, "grad_norm": 1.0233634774609417, "learning_rate": 5.277869254280899e-06, "loss": 0.5459, "step": 7815 }, { "epoch": 1.0, "grad_norm": 0.6057256830890536, "learning_rate": 5.27683925639563e-06, "loss": 0.543, "step": 7816 }, { "epoch": 1.0, "grad_norm": 0.5877385092786994, "learning_rate": 5.275809246726198e-06, "loss": 0.5288, "step": 7817 }, { "epoch": 1.0, "grad_norm": 0.9096373109851044, "learning_rate": 5.2747792253164475e-06, "loss": 0.6386, "step": 7818 }, { "epoch": 1.0, "grad_norm": 0.628624302150182, "learning_rate": 5.273749192210223e-06, "loss": 0.5669, "step": 7819 }, { "epoch": 1.0, "grad_norm": 0.5481796449252005, "learning_rate": 5.272719147451372e-06, "loss": 0.519, "step": 7820 }, { "epoch": 1.0, "grad_norm": 1.0152923342247557, "learning_rate": 5.271689091083737e-06, "loss": 0.6053, "step": 7821 }, { "epoch": 1.0, "grad_norm": 2.0712457774012485, "learning_rate": 5.270659023151164e-06, "loss": 0.614, "step": 7822 }, { "epoch": 1.0, "grad_norm": 0.688658659315264, "learning_rate": 5.269628943697504e-06, "loss": 0.5533, "step": 7823 }, { "epoch": 1.0, "grad_norm": 0.6756675346791414, "learning_rate": 5.2685988527666e-06, "loss": 0.5231, "step": 7824 }, { "epoch": 1.0, "grad_norm": 0.5546344409186675, "learning_rate": 5.267568750402302e-06, "loss": 0.4822, "step": 7825 }, { "epoch": 1.0, "grad_norm": 0.7301359025716571, "learning_rate": 5.266538636648457e-06, "loss": 0.5981, "step": 7826 }, { "epoch": 1.0, "grad_norm": 0.6749830205492546, "learning_rate": 5.265508511548914e-06, "loss": 0.5418, "step": 7827 }, { "epoch": 1.0, "grad_norm": 0.7416448440544625, "learning_rate": 5.26447837514752e-06, "loss": 0.5355, "step": 7828 }, { "epoch": 1.0, "grad_norm": 0.5509875832044191, "learning_rate": 5.263448227488129e-06, "loss": 0.5682, "step": 7829 }, { "epoch": 1.0, "grad_norm": 1.07279499002777, "learning_rate": 5.26241806861459e-06, "loss": 0.5951, "step": 7830 }, { "epoch": 1.0, "grad_norm": 0.5859297692953886, "learning_rate": 5.26138789857075e-06, "loss": 0.5068, "step": 7831 }, { "epoch": 1.0, "grad_norm": 0.7927452168983924, "learning_rate": 5.260357717400464e-06, "loss": 0.6395, "step": 7832 }, { "epoch": 1.0, "grad_norm": 0.6918959233784057, "learning_rate": 5.2593275251475815e-06, "loss": 0.5167, "step": 7833 }, { "epoch": 1.0, "grad_norm": 0.5879944535026977, "learning_rate": 5.258297321855955e-06, "loss": 0.555, "step": 7834 }, { "epoch": 1.0, "grad_norm": 0.7328910998645103, "learning_rate": 5.257267107569437e-06, "loss": 0.5177, "step": 7835 }, { "epoch": 1.0, "grad_norm": 0.6526112273473237, "learning_rate": 5.25623688233188e-06, "loss": 0.4904, "step": 7836 }, { "epoch": 1.0, "grad_norm": 0.780447083648694, "learning_rate": 5.255206646187137e-06, "loss": 0.6327, "step": 7837 }, { "epoch": 1.0, "grad_norm": 0.603639518509909, "learning_rate": 5.254176399179063e-06, "loss": 0.5729, "step": 7838 }, { "epoch": 1.0, "grad_norm": 0.8902777452328379, "learning_rate": 5.253146141351513e-06, "loss": 0.5984, "step": 7839 }, { "epoch": 1.0, "grad_norm": 0.6154693779911932, "learning_rate": 5.252115872748339e-06, "loss": 0.5397, "step": 7840 }, { "epoch": 1.0, "grad_norm": 0.6534417590394038, "learning_rate": 5.2510855934134e-06, "loss": 0.5333, "step": 7841 }, { "epoch": 1.0, "grad_norm": 0.6595574881738436, "learning_rate": 5.2500553033905475e-06, "loss": 0.5312, "step": 7842 }, { "epoch": 1.0, "grad_norm": 0.6746130595493086, "learning_rate": 5.249025002723641e-06, "loss": 0.5419, "step": 7843 }, { "epoch": 1.0, "grad_norm": 0.6270313439856194, "learning_rate": 5.247994691456536e-06, "loss": 0.5279, "step": 7844 }, { "epoch": 1.0, "grad_norm": 0.775804032349742, "learning_rate": 5.24696436963309e-06, "loss": 0.5997, "step": 7845 }, { "epoch": 1.0, "grad_norm": 0.832411540395696, "learning_rate": 5.2459340372971575e-06, "loss": 0.5826, "step": 7846 }, { "epoch": 1.0, "grad_norm": 0.6519801525397707, "learning_rate": 5.244903694492601e-06, "loss": 0.5549, "step": 7847 }, { "epoch": 1.0, "grad_norm": 0.6134851027548354, "learning_rate": 5.243873341263277e-06, "loss": 0.4926, "step": 7848 }, { "epoch": 1.0, "grad_norm": 0.7119932640697326, "learning_rate": 5.242842977653043e-06, "loss": 0.5691, "step": 7849 }, { "epoch": 1.0, "grad_norm": 1.2222442909906404, "learning_rate": 5.24181260370576e-06, "loss": 0.5123, "step": 7850 }, { "epoch": 1.0, "grad_norm": 0.5758065634254151, "learning_rate": 5.240782219465288e-06, "loss": 0.4688, "step": 7851 }, { "epoch": 1.0, "grad_norm": 0.6321561441497726, "learning_rate": 5.239751824975486e-06, "loss": 0.462, "step": 7852 }, { "epoch": 1.0, "grad_norm": 0.558115720826898, "learning_rate": 5.238721420280214e-06, "loss": 0.4966, "step": 7853 }, { "epoch": 1.0, "grad_norm": 0.5588446667874375, "learning_rate": 5.2376910054233345e-06, "loss": 0.4864, "step": 7854 }, { "epoch": 1.0, "grad_norm": 0.5867568047909654, "learning_rate": 5.236660580448708e-06, "loss": 0.4867, "step": 7855 }, { "epoch": 1.0, "grad_norm": 0.5976199452892107, "learning_rate": 5.2356301454001975e-06, "loss": 0.4574, "step": 7856 }, { "epoch": 1.0, "grad_norm": 0.57499500957606, "learning_rate": 5.234599700321665e-06, "loss": 0.4515, "step": 7857 }, { "epoch": 1.0, "grad_norm": 0.6237006656153087, "learning_rate": 5.233569245256972e-06, "loss": 0.4501, "step": 7858 }, { "epoch": 1.0, "grad_norm": 0.6916216831897695, "learning_rate": 5.232538780249983e-06, "loss": 0.4787, "step": 7859 }, { "epoch": 1.0, "grad_norm": 0.6085300666010587, "learning_rate": 5.23150830534456e-06, "loss": 0.4869, "step": 7860 }, { "epoch": 1.0, "grad_norm": 0.8276365431073447, "learning_rate": 5.230477820584571e-06, "loss": 0.5197, "step": 7861 }, { "epoch": 1.0, "grad_norm": 0.5487516361202578, "learning_rate": 5.2294473260138755e-06, "loss": 0.4347, "step": 7862 }, { "epoch": 1.0, "grad_norm": 0.5517042728327083, "learning_rate": 5.228416821676342e-06, "loss": 0.4777, "step": 7863 }, { "epoch": 1.0, "grad_norm": 0.6183710365216042, "learning_rate": 5.227386307615834e-06, "loss": 0.5175, "step": 7864 }, { "epoch": 1.0, "grad_norm": 0.6792680583163542, "learning_rate": 5.226355783876216e-06, "loss": 0.5101, "step": 7865 }, { "epoch": 1.0, "grad_norm": 0.528404102546661, "learning_rate": 5.225325250501356e-06, "loss": 0.4194, "step": 7866 }, { "epoch": 1.0, "grad_norm": 0.5584978234264941, "learning_rate": 5.224294707535121e-06, "loss": 0.4969, "step": 7867 }, { "epoch": 1.0, "grad_norm": 0.6369438303944066, "learning_rate": 5.223264155021377e-06, "loss": 0.4655, "step": 7868 }, { "epoch": 1.0, "grad_norm": 0.5845717616783592, "learning_rate": 5.222233593003991e-06, "loss": 0.4757, "step": 7869 }, { "epoch": 1.0, "grad_norm": 0.5806164828941629, "learning_rate": 5.2212030215268316e-06, "loss": 0.4334, "step": 7870 }, { "epoch": 1.0, "grad_norm": 0.6175428783073746, "learning_rate": 5.220172440633765e-06, "loss": 0.4572, "step": 7871 }, { "epoch": 1.0, "grad_norm": 0.6714088486505605, "learning_rate": 5.219141850368663e-06, "loss": 0.5005, "step": 7872 }, { "epoch": 1.0, "grad_norm": 0.6064130224923959, "learning_rate": 5.218111250775392e-06, "loss": 0.4589, "step": 7873 }, { "epoch": 1.0, "grad_norm": 0.5881835032905863, "learning_rate": 5.217080641897822e-06, "loss": 0.4547, "step": 7874 }, { "epoch": 1.0, "grad_norm": 0.5449062120445409, "learning_rate": 5.216050023779823e-06, "loss": 0.4079, "step": 7875 }, { "epoch": 1.0, "grad_norm": 0.5739006332696504, "learning_rate": 5.215019396465265e-06, "loss": 0.466, "step": 7876 }, { "epoch": 1.0, "grad_norm": 0.7404029981314925, "learning_rate": 5.2139887599980165e-06, "loss": 0.5023, "step": 7877 }, { "epoch": 1.0, "grad_norm": 0.5806108049422828, "learning_rate": 5.212958114421952e-06, "loss": 0.4803, "step": 7878 }, { "epoch": 1.0, "grad_norm": 0.6495553722544288, "learning_rate": 5.211927459780941e-06, "loss": 0.4933, "step": 7879 }, { "epoch": 1.0, "grad_norm": 0.7496193297608316, "learning_rate": 5.210896796118856e-06, "loss": 0.5363, "step": 7880 }, { "epoch": 1.0, "grad_norm": 0.7422031504574214, "learning_rate": 5.209866123479568e-06, "loss": 0.5495, "step": 7881 }, { "epoch": 1.0, "grad_norm": 1.2325166974738717, "learning_rate": 5.208835441906949e-06, "loss": 0.5266, "step": 7882 }, { "epoch": 1.0, "grad_norm": 0.5955422006311042, "learning_rate": 5.207804751444873e-06, "loss": 0.4479, "step": 7883 }, { "epoch": 1.0, "grad_norm": 0.6866498561064781, "learning_rate": 5.206774052137211e-06, "loss": 0.5006, "step": 7884 }, { "epoch": 1.0, "grad_norm": 0.7055277263865362, "learning_rate": 5.205743344027841e-06, "loss": 0.5079, "step": 7885 }, { "epoch": 1.0, "grad_norm": 0.7765973950804936, "learning_rate": 5.204712627160633e-06, "loss": 0.5159, "step": 7886 }, { "epoch": 1.0, "grad_norm": 0.8552237561245664, "learning_rate": 5.203681901579463e-06, "loss": 0.5298, "step": 7887 }, { "epoch": 1.0, "grad_norm": 0.7650165748175736, "learning_rate": 5.2026511673282055e-06, "loss": 0.5489, "step": 7888 }, { "epoch": 1.01, "grad_norm": 0.7784502397602112, "learning_rate": 5.201620424450735e-06, "loss": 0.5351, "step": 7889 }, { "epoch": 1.01, "grad_norm": 0.717783693811607, "learning_rate": 5.2005896729909265e-06, "loss": 0.5285, "step": 7890 }, { "epoch": 1.01, "grad_norm": 0.6732178759992189, "learning_rate": 5.1995589129926584e-06, "loss": 0.5296, "step": 7891 }, { "epoch": 1.01, "grad_norm": 0.7143077838780056, "learning_rate": 5.1985281444998035e-06, "loss": 0.5036, "step": 7892 }, { "epoch": 1.01, "grad_norm": 0.5784084365810707, "learning_rate": 5.1974973675562415e-06, "loss": 0.4775, "step": 7893 }, { "epoch": 1.01, "grad_norm": 0.7220102673155449, "learning_rate": 5.196466582205847e-06, "loss": 0.5045, "step": 7894 }, { "epoch": 1.01, "grad_norm": 0.6974815958785543, "learning_rate": 5.195435788492498e-06, "loss": 0.5339, "step": 7895 }, { "epoch": 1.01, "grad_norm": 0.661259867635626, "learning_rate": 5.194404986460072e-06, "loss": 0.5089, "step": 7896 }, { "epoch": 1.01, "grad_norm": 0.6067626326698959, "learning_rate": 5.193374176152447e-06, "loss": 0.4607, "step": 7897 }, { "epoch": 1.01, "grad_norm": 0.5788263325410276, "learning_rate": 5.192343357613501e-06, "loss": 0.4585, "step": 7898 }, { "epoch": 1.01, "grad_norm": 0.5907338388077803, "learning_rate": 5.191312530887111e-06, "loss": 0.4082, "step": 7899 }, { "epoch": 1.01, "grad_norm": 0.6836441289086129, "learning_rate": 5.190281696017161e-06, "loss": 0.4782, "step": 7900 }, { "epoch": 1.01, "grad_norm": 0.7343299955143782, "learning_rate": 5.189250853047526e-06, "loss": 0.4759, "step": 7901 }, { "epoch": 1.01, "grad_norm": 0.7058577594577498, "learning_rate": 5.1882200020220865e-06, "loss": 0.5097, "step": 7902 }, { "epoch": 1.01, "grad_norm": 0.5836534081409047, "learning_rate": 5.187189142984724e-06, "loss": 0.4643, "step": 7903 }, { "epoch": 1.01, "grad_norm": 0.5992547612054283, "learning_rate": 5.186158275979317e-06, "loss": 0.5217, "step": 7904 }, { "epoch": 1.01, "grad_norm": 0.7516662410073572, "learning_rate": 5.185127401049747e-06, "loss": 0.5022, "step": 7905 }, { "epoch": 1.01, "grad_norm": 0.6707635254400446, "learning_rate": 5.184096518239896e-06, "loss": 0.5076, "step": 7906 }, { "epoch": 1.01, "grad_norm": 0.9783763335461778, "learning_rate": 5.183065627593644e-06, "loss": 0.5246, "step": 7907 }, { "epoch": 1.01, "grad_norm": 0.6263104276982511, "learning_rate": 5.182034729154873e-06, "loss": 0.4564, "step": 7908 }, { "epoch": 1.01, "grad_norm": 0.8231500856341674, "learning_rate": 5.1810038229674655e-06, "loss": 0.499, "step": 7909 }, { "epoch": 1.01, "grad_norm": 0.6786524373082691, "learning_rate": 5.179972909075304e-06, "loss": 0.5495, "step": 7910 }, { "epoch": 1.01, "grad_norm": 0.8591074376454022, "learning_rate": 5.17894198752227e-06, "loss": 0.6068, "step": 7911 }, { "epoch": 1.01, "grad_norm": 0.5978885680276532, "learning_rate": 5.177911058352249e-06, "loss": 0.4423, "step": 7912 }, { "epoch": 1.01, "grad_norm": 0.5953491452164323, "learning_rate": 5.1768801216091214e-06, "loss": 0.4332, "step": 7913 }, { "epoch": 1.01, "grad_norm": 0.6073291604471678, "learning_rate": 5.175849177336772e-06, "loss": 0.445, "step": 7914 }, { "epoch": 1.01, "grad_norm": 0.5661976490389823, "learning_rate": 5.1748182255790854e-06, "loss": 0.4511, "step": 7915 }, { "epoch": 1.01, "grad_norm": 0.7594191405537379, "learning_rate": 5.173787266379946e-06, "loss": 0.4677, "step": 7916 }, { "epoch": 1.01, "grad_norm": 0.5494028434496976, "learning_rate": 5.1727562997832385e-06, "loss": 0.4489, "step": 7917 }, { "epoch": 1.01, "grad_norm": 0.5562907041862178, "learning_rate": 5.171725325832846e-06, "loss": 0.4004, "step": 7918 }, { "epoch": 1.01, "grad_norm": 0.6547373523823373, "learning_rate": 5.170694344572656e-06, "loss": 0.5031, "step": 7919 }, { "epoch": 1.01, "grad_norm": 0.716585560276361, "learning_rate": 5.169663356046554e-06, "loss": 0.5387, "step": 7920 }, { "epoch": 1.01, "grad_norm": 0.6218172132755906, "learning_rate": 5.168632360298422e-06, "loss": 0.4946, "step": 7921 }, { "epoch": 1.01, "grad_norm": 0.6218170480458325, "learning_rate": 5.167601357372152e-06, "loss": 0.4442, "step": 7922 }, { "epoch": 1.01, "grad_norm": 0.6189001511113672, "learning_rate": 5.1665703473116276e-06, "loss": 0.4465, "step": 7923 }, { "epoch": 1.01, "grad_norm": 0.5814782125395176, "learning_rate": 5.165539330160736e-06, "loss": 0.4239, "step": 7924 }, { "epoch": 1.01, "grad_norm": 0.6314575397965841, "learning_rate": 5.164508305963363e-06, "loss": 0.4803, "step": 7925 }, { "epoch": 1.01, "grad_norm": 0.5776419503010333, "learning_rate": 5.163477274763398e-06, "loss": 0.4772, "step": 7926 }, { "epoch": 1.01, "grad_norm": 0.6093649946999745, "learning_rate": 5.162446236604727e-06, "loss": 0.4915, "step": 7927 }, { "epoch": 1.01, "grad_norm": 0.6529769421281877, "learning_rate": 5.161415191531238e-06, "loss": 0.48, "step": 7928 }, { "epoch": 1.01, "grad_norm": 0.6524811141356149, "learning_rate": 5.160384139586823e-06, "loss": 0.4926, "step": 7929 }, { "epoch": 1.01, "grad_norm": 0.7418814915815949, "learning_rate": 5.159353080815366e-06, "loss": 0.548, "step": 7930 }, { "epoch": 1.01, "grad_norm": 0.880659114316506, "learning_rate": 5.1583220152607576e-06, "loss": 0.4367, "step": 7931 }, { "epoch": 1.01, "grad_norm": 0.7589214795491664, "learning_rate": 5.157290942966887e-06, "loss": 0.4815, "step": 7932 }, { "epoch": 1.01, "grad_norm": 0.7780801338528642, "learning_rate": 5.156259863977642e-06, "loss": 0.5576, "step": 7933 }, { "epoch": 1.01, "grad_norm": 0.6991793599579742, "learning_rate": 5.155228778336916e-06, "loss": 0.475, "step": 7934 }, { "epoch": 1.01, "grad_norm": 0.6366171494469306, "learning_rate": 5.154197686088597e-06, "loss": 0.483, "step": 7935 }, { "epoch": 1.01, "grad_norm": 0.8350875705969815, "learning_rate": 5.153166587276575e-06, "loss": 0.5303, "step": 7936 }, { "epoch": 1.01, "grad_norm": 0.6537874640664983, "learning_rate": 5.15213548194474e-06, "loss": 0.4723, "step": 7937 }, { "epoch": 1.01, "grad_norm": 0.7102780936488094, "learning_rate": 5.151104370136985e-06, "loss": 0.5038, "step": 7938 }, { "epoch": 1.01, "grad_norm": 0.6163822227108454, "learning_rate": 5.150073251897197e-06, "loss": 0.4586, "step": 7939 }, { "epoch": 1.01, "grad_norm": 0.8634455972841117, "learning_rate": 5.149042127269273e-06, "loss": 0.4737, "step": 7940 }, { "epoch": 1.01, "grad_norm": 0.6060153083147701, "learning_rate": 5.148010996297101e-06, "loss": 0.4322, "step": 7941 }, { "epoch": 1.01, "grad_norm": 0.6140990846526703, "learning_rate": 5.146979859024575e-06, "loss": 0.4033, "step": 7942 }, { "epoch": 1.01, "grad_norm": 0.6012467827600491, "learning_rate": 5.1459487154955845e-06, "loss": 0.4946, "step": 7943 }, { "epoch": 1.01, "grad_norm": 0.636700584098543, "learning_rate": 5.144917565754024e-06, "loss": 0.4964, "step": 7944 }, { "epoch": 1.01, "grad_norm": 0.7500634117767534, "learning_rate": 5.143886409843787e-06, "loss": 0.493, "step": 7945 }, { "epoch": 1.01, "grad_norm": 0.5440606391210333, "learning_rate": 5.142855247808763e-06, "loss": 0.4259, "step": 7946 }, { "epoch": 1.01, "grad_norm": 0.6452696746661204, "learning_rate": 5.141824079692849e-06, "loss": 0.4542, "step": 7947 }, { "epoch": 1.01, "grad_norm": 0.6072678091343034, "learning_rate": 5.140792905539936e-06, "loss": 0.4354, "step": 7948 }, { "epoch": 1.01, "grad_norm": 0.632203985306397, "learning_rate": 5.1397617253939214e-06, "loss": 0.4692, "step": 7949 }, { "epoch": 1.01, "grad_norm": 0.6658873492179695, "learning_rate": 5.1387305392986955e-06, "loss": 0.5216, "step": 7950 }, { "epoch": 1.01, "grad_norm": 0.7683551914337232, "learning_rate": 5.137699347298153e-06, "loss": 0.5434, "step": 7951 }, { "epoch": 1.01, "grad_norm": 0.5920245185048284, "learning_rate": 5.136668149436189e-06, "loss": 0.4606, "step": 7952 }, { "epoch": 1.01, "grad_norm": 0.7175111090016661, "learning_rate": 5.1356369457567e-06, "loss": 0.521, "step": 7953 }, { "epoch": 1.01, "grad_norm": 0.5766744624633092, "learning_rate": 5.1346057363035796e-06, "loss": 0.4558, "step": 7954 }, { "epoch": 1.01, "grad_norm": 0.6628743339456539, "learning_rate": 5.133574521120723e-06, "loss": 0.432, "step": 7955 }, { "epoch": 1.01, "grad_norm": 0.6042426929200225, "learning_rate": 5.132543300252026e-06, "loss": 0.4063, "step": 7956 }, { "epoch": 1.01, "grad_norm": 0.6178481322734461, "learning_rate": 5.131512073741383e-06, "loss": 0.4723, "step": 7957 }, { "epoch": 1.01, "grad_norm": 0.7220445758293353, "learning_rate": 5.1304808416326935e-06, "loss": 0.4703, "step": 7958 }, { "epoch": 1.01, "grad_norm": 0.5529690856768076, "learning_rate": 5.12944960396985e-06, "loss": 0.4275, "step": 7959 }, { "epoch": 1.01, "grad_norm": 0.5661459335550141, "learning_rate": 5.128418360796751e-06, "loss": 0.4271, "step": 7960 }, { "epoch": 1.01, "grad_norm": 0.7664397795751051, "learning_rate": 5.1273871121572916e-06, "loss": 0.5452, "step": 7961 }, { "epoch": 1.01, "grad_norm": 0.7308487637051548, "learning_rate": 5.126355858095371e-06, "loss": 0.5179, "step": 7962 }, { "epoch": 1.01, "grad_norm": 0.8226834474919791, "learning_rate": 5.125324598654885e-06, "loss": 0.5422, "step": 7963 }, { "epoch": 1.01, "grad_norm": 0.5945051764222167, "learning_rate": 5.1242933338797315e-06, "loss": 0.4644, "step": 7964 }, { "epoch": 1.01, "grad_norm": 0.6473159898242234, "learning_rate": 5.123262063813809e-06, "loss": 0.469, "step": 7965 }, { "epoch": 1.01, "grad_norm": 0.7744216021454134, "learning_rate": 5.1222307885010125e-06, "loss": 0.5292, "step": 7966 }, { "epoch": 1.01, "grad_norm": 0.644001575144423, "learning_rate": 5.121199507985243e-06, "loss": 0.5379, "step": 7967 }, { "epoch": 1.02, "grad_norm": 0.8232099811801318, "learning_rate": 5.120168222310398e-06, "loss": 0.4931, "step": 7968 }, { "epoch": 1.02, "grad_norm": 0.6478201516002157, "learning_rate": 5.119136931520374e-06, "loss": 0.4849, "step": 7969 }, { "epoch": 1.02, "grad_norm": 0.6098348884653333, "learning_rate": 5.118105635659072e-06, "loss": 0.4889, "step": 7970 }, { "epoch": 1.02, "grad_norm": 0.7094914566743494, "learning_rate": 5.1170743347703925e-06, "loss": 0.5458, "step": 7971 }, { "epoch": 1.02, "grad_norm": 0.5057615961880692, "learning_rate": 5.116043028898231e-06, "loss": 0.4074, "step": 7972 }, { "epoch": 1.02, "grad_norm": 0.6252288079856869, "learning_rate": 5.1150117180864885e-06, "loss": 0.4581, "step": 7973 }, { "epoch": 1.02, "grad_norm": 0.7702721521291179, "learning_rate": 5.113980402379066e-06, "loss": 0.4721, "step": 7974 }, { "epoch": 1.02, "grad_norm": 0.6409602156185231, "learning_rate": 5.112949081819861e-06, "loss": 0.4741, "step": 7975 }, { "epoch": 1.02, "grad_norm": 0.7036416261389531, "learning_rate": 5.1119177564527744e-06, "loss": 0.5164, "step": 7976 }, { "epoch": 1.02, "grad_norm": 0.6132219639646236, "learning_rate": 5.110886426321706e-06, "loss": 0.48, "step": 7977 }, { "epoch": 1.02, "grad_norm": 0.6490586193228038, "learning_rate": 5.109855091470558e-06, "loss": 0.4333, "step": 7978 }, { "epoch": 1.02, "grad_norm": 0.6223689032668894, "learning_rate": 5.108823751943229e-06, "loss": 0.4675, "step": 7979 }, { "epoch": 1.02, "grad_norm": 0.7359589583358829, "learning_rate": 5.107792407783621e-06, "loss": 0.5063, "step": 7980 }, { "epoch": 1.02, "grad_norm": 0.7285229381058957, "learning_rate": 5.106761059035635e-06, "loss": 0.4765, "step": 7981 }, { "epoch": 1.02, "grad_norm": 0.8333369552148613, "learning_rate": 5.105729705743172e-06, "loss": 0.5086, "step": 7982 }, { "epoch": 1.02, "grad_norm": 0.6623259111464884, "learning_rate": 5.104698347950133e-06, "loss": 0.471, "step": 7983 }, { "epoch": 1.02, "grad_norm": 0.6391896574243159, "learning_rate": 5.103666985700419e-06, "loss": 0.3967, "step": 7984 }, { "epoch": 1.02, "grad_norm": 0.5695048795154143, "learning_rate": 5.102635619037933e-06, "loss": 0.4193, "step": 7985 }, { "epoch": 1.02, "grad_norm": 0.6258258704388905, "learning_rate": 5.101604248006578e-06, "loss": 0.4494, "step": 7986 }, { "epoch": 1.02, "grad_norm": 0.5364360820448856, "learning_rate": 5.100572872650253e-06, "loss": 0.4197, "step": 7987 }, { "epoch": 1.02, "grad_norm": 0.5681300376540526, "learning_rate": 5.099541493012864e-06, "loss": 0.4309, "step": 7988 }, { "epoch": 1.02, "grad_norm": 0.6438331062224097, "learning_rate": 5.098510109138311e-06, "loss": 0.4402, "step": 7989 }, { "epoch": 1.02, "grad_norm": 0.7032718223863288, "learning_rate": 5.097478721070497e-06, "loss": 0.442, "step": 7990 }, { "epoch": 1.02, "grad_norm": 2.2396032828535386, "learning_rate": 5.096447328853325e-06, "loss": 0.5068, "step": 7991 }, { "epoch": 1.02, "grad_norm": 0.6757881311741756, "learning_rate": 5.095415932530699e-06, "loss": 0.4868, "step": 7992 }, { "epoch": 1.02, "grad_norm": 0.6961110881329763, "learning_rate": 5.094384532146522e-06, "loss": 0.5216, "step": 7993 }, { "epoch": 1.02, "grad_norm": 0.7916969372080543, "learning_rate": 5.093353127744698e-06, "loss": 0.4935, "step": 7994 }, { "epoch": 1.02, "grad_norm": 0.7472611838981202, "learning_rate": 5.092321719369127e-06, "loss": 0.4576, "step": 7995 }, { "epoch": 1.02, "grad_norm": 0.7628731599237817, "learning_rate": 5.091290307063718e-06, "loss": 0.5065, "step": 7996 }, { "epoch": 1.02, "grad_norm": 0.5843069535493849, "learning_rate": 5.09025889087237e-06, "loss": 0.4872, "step": 7997 }, { "epoch": 1.02, "grad_norm": 0.8597133104545779, "learning_rate": 5.0892274708389915e-06, "loss": 0.5905, "step": 7998 }, { "epoch": 1.02, "grad_norm": 0.8233981790207843, "learning_rate": 5.088196047007484e-06, "loss": 0.5543, "step": 7999 }, { "epoch": 1.02, "grad_norm": 0.8092275421862285, "learning_rate": 5.087164619421753e-06, "loss": 0.5465, "step": 8000 }, { "epoch": 1.02, "grad_norm": 1.1104018329533105, "learning_rate": 5.0861331881257005e-06, "loss": 0.5639, "step": 8001 }, { "epoch": 1.02, "grad_norm": 1.7982239717209036, "learning_rate": 5.085101753163235e-06, "loss": 0.5469, "step": 8002 }, { "epoch": 1.02, "grad_norm": 0.7441514512711345, "learning_rate": 5.084070314578261e-06, "loss": 0.5199, "step": 8003 }, { "epoch": 1.02, "grad_norm": 0.63587168828236, "learning_rate": 5.083038872414681e-06, "loss": 0.4655, "step": 8004 }, { "epoch": 1.02, "grad_norm": 0.505826891907447, "learning_rate": 5.082007426716402e-06, "loss": 0.4379, "step": 8005 }, { "epoch": 1.02, "grad_norm": 0.5715545494586659, "learning_rate": 5.080975977527329e-06, "loss": 0.4634, "step": 8006 }, { "epoch": 1.02, "grad_norm": 0.6008932968992711, "learning_rate": 5.079944524891367e-06, "loss": 0.4287, "step": 8007 }, { "epoch": 1.02, "grad_norm": 0.5582738378860898, "learning_rate": 5.078913068852421e-06, "loss": 0.4634, "step": 8008 }, { "epoch": 1.02, "grad_norm": 0.6942054465762226, "learning_rate": 5.077881609454399e-06, "loss": 0.4845, "step": 8009 }, { "epoch": 1.02, "grad_norm": 6.845694199068934, "learning_rate": 5.076850146741207e-06, "loss": 0.5073, "step": 8010 }, { "epoch": 1.02, "grad_norm": 0.6477070260379001, "learning_rate": 5.075818680756749e-06, "loss": 0.4626, "step": 8011 }, { "epoch": 1.02, "grad_norm": 0.6164803333648873, "learning_rate": 5.074787211544931e-06, "loss": 0.4639, "step": 8012 }, { "epoch": 1.02, "grad_norm": 0.6111837475395059, "learning_rate": 5.0737557391496615e-06, "loss": 0.4568, "step": 8013 }, { "epoch": 1.02, "grad_norm": 0.6199208845182491, "learning_rate": 5.0727242636148445e-06, "loss": 0.5111, "step": 8014 }, { "epoch": 1.02, "grad_norm": 0.6862575354801447, "learning_rate": 5.071692784984389e-06, "loss": 0.4912, "step": 8015 }, { "epoch": 1.02, "grad_norm": 0.9000295164859495, "learning_rate": 5.070661303302201e-06, "loss": 0.4288, "step": 8016 }, { "epoch": 1.02, "grad_norm": 0.8527616750432485, "learning_rate": 5.069629818612186e-06, "loss": 0.5453, "step": 8017 }, { "epoch": 1.02, "grad_norm": 0.5989249596671377, "learning_rate": 5.068598330958253e-06, "loss": 0.4164, "step": 8018 }, { "epoch": 1.02, "grad_norm": 0.6319806794869235, "learning_rate": 5.067566840384309e-06, "loss": 0.4262, "step": 8019 }, { "epoch": 1.02, "grad_norm": 0.5789886376819435, "learning_rate": 5.066535346934259e-06, "loss": 0.4989, "step": 8020 }, { "epoch": 1.02, "grad_norm": 0.5957754257489064, "learning_rate": 5.065503850652014e-06, "loss": 0.4031, "step": 8021 }, { "epoch": 1.02, "grad_norm": 0.6239028935529184, "learning_rate": 5.064472351581478e-06, "loss": 0.5024, "step": 8022 }, { "epoch": 1.02, "grad_norm": 0.8934761990814896, "learning_rate": 5.063440849766559e-06, "loss": 0.5414, "step": 8023 }, { "epoch": 1.02, "grad_norm": 0.5852631394463695, "learning_rate": 5.062409345251167e-06, "loss": 0.4494, "step": 8024 }, { "epoch": 1.02, "grad_norm": 0.6537072578740031, "learning_rate": 5.0613778380792075e-06, "loss": 0.433, "step": 8025 }, { "epoch": 1.02, "grad_norm": 0.5423354152096477, "learning_rate": 5.06034632829459e-06, "loss": 0.4414, "step": 8026 }, { "epoch": 1.02, "grad_norm": 0.6675241775017613, "learning_rate": 5.059314815941224e-06, "loss": 0.4474, "step": 8027 }, { "epoch": 1.02, "grad_norm": 1.9803082593937613, "learning_rate": 5.058283301063014e-06, "loss": 0.5153, "step": 8028 }, { "epoch": 1.02, "grad_norm": 0.7927231404575348, "learning_rate": 5.057251783703871e-06, "loss": 0.4663, "step": 8029 }, { "epoch": 1.02, "grad_norm": 0.5923349367851576, "learning_rate": 5.056220263907702e-06, "loss": 0.4421, "step": 8030 }, { "epoch": 1.02, "grad_norm": 0.5537083884691775, "learning_rate": 5.055188741718416e-06, "loss": 0.4337, "step": 8031 }, { "epoch": 1.02, "grad_norm": 0.594545552648897, "learning_rate": 5.054157217179922e-06, "loss": 0.4941, "step": 8032 }, { "epoch": 1.02, "grad_norm": 0.8084866070847276, "learning_rate": 5.053125690336127e-06, "loss": 0.5654, "step": 8033 }, { "epoch": 1.02, "grad_norm": 0.758841269158062, "learning_rate": 5.0520941612309425e-06, "loss": 0.5102, "step": 8034 }, { "epoch": 1.02, "grad_norm": 1.0174381773721153, "learning_rate": 5.051062629908276e-06, "loss": 0.4772, "step": 8035 }, { "epoch": 1.02, "grad_norm": 0.7727389710012816, "learning_rate": 5.050031096412036e-06, "loss": 0.4727, "step": 8036 }, { "epoch": 1.02, "grad_norm": 0.6869542875623863, "learning_rate": 5.048999560786132e-06, "loss": 0.4683, "step": 8037 }, { "epoch": 1.02, "grad_norm": 0.7761629624732608, "learning_rate": 5.047968023074474e-06, "loss": 0.4871, "step": 8038 }, { "epoch": 1.02, "grad_norm": 0.6529765213071616, "learning_rate": 5.046936483320969e-06, "loss": 0.4833, "step": 8039 }, { "epoch": 1.02, "grad_norm": 0.8153465847016584, "learning_rate": 5.045904941569529e-06, "loss": 0.5018, "step": 8040 }, { "epoch": 1.02, "grad_norm": 0.5779988652875337, "learning_rate": 5.044873397864063e-06, "loss": 0.4747, "step": 8041 }, { "epoch": 1.02, "grad_norm": 0.6758674169813761, "learning_rate": 5.0438418522484785e-06, "loss": 0.4542, "step": 8042 }, { "epoch": 1.02, "grad_norm": 0.9415339132924784, "learning_rate": 5.042810304766688e-06, "loss": 0.4939, "step": 8043 }, { "epoch": 1.02, "grad_norm": 0.7408326508652936, "learning_rate": 5.0417787554625984e-06, "loss": 0.5083, "step": 8044 }, { "epoch": 1.02, "grad_norm": 0.5427918811238637, "learning_rate": 5.040747204380121e-06, "loss": 0.4432, "step": 8045 }, { "epoch": 1.03, "grad_norm": 0.6481608688800259, "learning_rate": 5.0397156515631654e-06, "loss": 0.4299, "step": 8046 }, { "epoch": 1.03, "grad_norm": 0.5796860423287302, "learning_rate": 5.038684097055641e-06, "loss": 0.4965, "step": 8047 }, { "epoch": 1.03, "grad_norm": 0.7174915973773783, "learning_rate": 5.0376525409014585e-06, "loss": 0.4193, "step": 8048 }, { "epoch": 1.03, "grad_norm": 0.6954211386399821, "learning_rate": 5.036620983144528e-06, "loss": 0.4754, "step": 8049 }, { "epoch": 1.03, "grad_norm": 0.7084335458577081, "learning_rate": 5.03558942382876e-06, "loss": 0.4233, "step": 8050 }, { "epoch": 1.03, "grad_norm": 0.6916893243234785, "learning_rate": 5.0345578629980605e-06, "loss": 0.4413, "step": 8051 }, { "epoch": 1.03, "grad_norm": 1.0630844949586986, "learning_rate": 5.033526300696346e-06, "loss": 0.526, "step": 8052 }, { "epoch": 1.03, "grad_norm": 1.573586967354869, "learning_rate": 5.032494736967525e-06, "loss": 0.5498, "step": 8053 }, { "epoch": 1.03, "grad_norm": 0.7079517205761564, "learning_rate": 5.031463171855505e-06, "loss": 0.4786, "step": 8054 }, { "epoch": 1.03, "grad_norm": 0.6865691109249946, "learning_rate": 5.030431605404199e-06, "loss": 0.4786, "step": 8055 }, { "epoch": 1.03, "grad_norm": 0.6232575626183916, "learning_rate": 5.029400037657517e-06, "loss": 0.5006, "step": 8056 }, { "epoch": 1.03, "grad_norm": 0.8902358097940759, "learning_rate": 5.028368468659368e-06, "loss": 0.5555, "step": 8057 }, { "epoch": 1.03, "grad_norm": 0.6793337767837564, "learning_rate": 5.027336898453665e-06, "loss": 0.5342, "step": 8058 }, { "epoch": 1.03, "grad_norm": 0.5939482657068546, "learning_rate": 5.026305327084318e-06, "loss": 0.5256, "step": 8059 }, { "epoch": 1.03, "grad_norm": 0.5916607362227726, "learning_rate": 5.025273754595237e-06, "loss": 0.4782, "step": 8060 }, { "epoch": 1.03, "grad_norm": 0.69822646169608, "learning_rate": 5.024242181030332e-06, "loss": 0.5008, "step": 8061 }, { "epoch": 1.03, "grad_norm": 0.752168127653501, "learning_rate": 5.023210606433516e-06, "loss": 0.5192, "step": 8062 }, { "epoch": 1.03, "grad_norm": 0.9104293343091803, "learning_rate": 5.022179030848698e-06, "loss": 0.4992, "step": 8063 }, { "epoch": 1.03, "grad_norm": 0.6680361321533085, "learning_rate": 5.02114745431979e-06, "loss": 0.4933, "step": 8064 }, { "epoch": 1.03, "grad_norm": 0.5704580273500565, "learning_rate": 5.020115876890702e-06, "loss": 0.4279, "step": 8065 }, { "epoch": 1.03, "grad_norm": 0.5712671736428463, "learning_rate": 5.019084298605346e-06, "loss": 0.448, "step": 8066 }, { "epoch": 1.03, "grad_norm": 0.818470240512177, "learning_rate": 5.018052719507632e-06, "loss": 0.5086, "step": 8067 }, { "epoch": 1.03, "grad_norm": 0.6867642127681672, "learning_rate": 5.0170211396414726e-06, "loss": 0.5067, "step": 8068 }, { "epoch": 1.03, "grad_norm": 0.8538337240545483, "learning_rate": 5.015989559050777e-06, "loss": 0.5239, "step": 8069 }, { "epoch": 1.03, "grad_norm": 0.7331282310856594, "learning_rate": 5.014957977779455e-06, "loss": 0.5354, "step": 8070 }, { "epoch": 1.03, "grad_norm": 0.8696451510505061, "learning_rate": 5.013926395871421e-06, "loss": 0.5801, "step": 8071 }, { "epoch": 1.03, "grad_norm": 0.7634593629744055, "learning_rate": 5.012894813370586e-06, "loss": 0.4981, "step": 8072 }, { "epoch": 1.03, "grad_norm": 0.5974869664160931, "learning_rate": 5.0118632303208595e-06, "loss": 0.4292, "step": 8073 }, { "epoch": 1.03, "grad_norm": 0.7414298773320958, "learning_rate": 5.0108316467661525e-06, "loss": 0.504, "step": 8074 }, { "epoch": 1.03, "grad_norm": 0.7498987872156508, "learning_rate": 5.0098000627503775e-06, "loss": 0.4294, "step": 8075 }, { "epoch": 1.03, "grad_norm": 0.9569737905633081, "learning_rate": 5.008768478317443e-06, "loss": 0.4443, "step": 8076 }, { "epoch": 1.03, "grad_norm": 0.6765939635966391, "learning_rate": 5.007736893511265e-06, "loss": 0.5097, "step": 8077 }, { "epoch": 1.03, "grad_norm": 0.6107574568996386, "learning_rate": 5.0067053083757515e-06, "loss": 0.4749, "step": 8078 }, { "epoch": 1.03, "grad_norm": 0.54428483955781, "learning_rate": 5.005673722954815e-06, "loss": 0.4284, "step": 8079 }, { "epoch": 1.03, "grad_norm": 0.577817880241673, "learning_rate": 5.004642137292365e-06, "loss": 0.5118, "step": 8080 }, { "epoch": 1.03, "grad_norm": 0.7605463303602441, "learning_rate": 5.003610551432315e-06, "loss": 0.4798, "step": 8081 }, { "epoch": 1.03, "grad_norm": 0.6804233083766142, "learning_rate": 5.002578965418575e-06, "loss": 0.536, "step": 8082 }, { "epoch": 1.03, "grad_norm": 0.7684880567027497, "learning_rate": 5.001547379295057e-06, "loss": 0.554, "step": 8083 }, { "epoch": 1.03, "grad_norm": 0.6047929180025671, "learning_rate": 5.000515793105671e-06, "loss": 0.4575, "step": 8084 }, { "epoch": 1.03, "grad_norm": 0.619301551951031, "learning_rate": 4.99948420689433e-06, "loss": 0.4825, "step": 8085 }, { "epoch": 1.03, "grad_norm": 0.7209483154140803, "learning_rate": 4.998452620704944e-06, "loss": 0.5081, "step": 8086 }, { "epoch": 1.03, "grad_norm": 0.7318865073274593, "learning_rate": 4.997421034581427e-06, "loss": 0.4698, "step": 8087 }, { "epoch": 1.03, "grad_norm": 0.7233770727932185, "learning_rate": 4.9963894485676865e-06, "loss": 0.5, "step": 8088 }, { "epoch": 1.03, "grad_norm": 0.7438788769480187, "learning_rate": 4.995357862707636e-06, "loss": 0.4994, "step": 8089 }, { "epoch": 1.03, "grad_norm": 0.6377103809597473, "learning_rate": 4.994326277045188e-06, "loss": 0.4386, "step": 8090 }, { "epoch": 1.03, "grad_norm": 0.6318927812348238, "learning_rate": 4.993294691624249e-06, "loss": 0.5101, "step": 8091 }, { "epoch": 1.03, "grad_norm": 0.8017299554073789, "learning_rate": 4.992263106488736e-06, "loss": 0.5517, "step": 8092 }, { "epoch": 1.03, "grad_norm": 0.7065999620554689, "learning_rate": 4.991231521682557e-06, "loss": 0.5332, "step": 8093 }, { "epoch": 1.03, "grad_norm": 0.6921646859919652, "learning_rate": 4.990199937249624e-06, "loss": 0.5236, "step": 8094 }, { "epoch": 1.03, "grad_norm": 0.6314241567649539, "learning_rate": 4.989168353233849e-06, "loss": 0.5216, "step": 8095 }, { "epoch": 1.03, "grad_norm": 0.7870602537592161, "learning_rate": 4.988136769679143e-06, "loss": 0.5748, "step": 8096 }, { "epoch": 1.03, "grad_norm": 0.8868492606936104, "learning_rate": 4.987105186629416e-06, "loss": 0.5435, "step": 8097 }, { "epoch": 1.03, "grad_norm": 0.5882130175175727, "learning_rate": 4.98607360412858e-06, "loss": 0.4647, "step": 8098 }, { "epoch": 1.03, "grad_norm": 0.6667964928118641, "learning_rate": 4.985042022220546e-06, "loss": 0.5304, "step": 8099 }, { "epoch": 1.03, "grad_norm": 0.7894263840401056, "learning_rate": 4.9840104409492264e-06, "loss": 0.5129, "step": 8100 }, { "epoch": 1.03, "grad_norm": 0.6598057151425882, "learning_rate": 4.982978860358531e-06, "loss": 0.5547, "step": 8101 }, { "epoch": 1.03, "grad_norm": 0.7475065550265066, "learning_rate": 4.98194728049237e-06, "loss": 0.4956, "step": 8102 }, { "epoch": 1.03, "grad_norm": 0.6108297537146267, "learning_rate": 4.9809157013946565e-06, "loss": 0.5047, "step": 8103 }, { "epoch": 1.03, "grad_norm": 0.8178384861045024, "learning_rate": 4.979884123109298e-06, "loss": 0.5725, "step": 8104 }, { "epoch": 1.03, "grad_norm": 0.5955650242377624, "learning_rate": 4.978852545680211e-06, "loss": 0.4829, "step": 8105 }, { "epoch": 1.03, "grad_norm": 0.5324535820881771, "learning_rate": 4.977820969151302e-06, "loss": 0.4561, "step": 8106 }, { "epoch": 1.03, "grad_norm": 0.5801369673208406, "learning_rate": 4.976789393566485e-06, "loss": 0.4415, "step": 8107 }, { "epoch": 1.03, "grad_norm": 0.5926204253813472, "learning_rate": 4.975757818969669e-06, "loss": 0.4245, "step": 8108 }, { "epoch": 1.03, "grad_norm": 0.7496313150571022, "learning_rate": 4.974726245404764e-06, "loss": 0.4558, "step": 8109 }, { "epoch": 1.03, "grad_norm": 0.6052156373739525, "learning_rate": 4.973694672915684e-06, "loss": 0.4904, "step": 8110 }, { "epoch": 1.03, "grad_norm": 0.668297626818539, "learning_rate": 4.972663101546337e-06, "loss": 0.5069, "step": 8111 }, { "epoch": 1.03, "grad_norm": 0.7076046746714018, "learning_rate": 4.9716315313406336e-06, "loss": 0.5435, "step": 8112 }, { "epoch": 1.03, "grad_norm": 0.783893601648489, "learning_rate": 4.970599962342486e-06, "loss": 0.537, "step": 8113 }, { "epoch": 1.03, "grad_norm": 0.6254790843764756, "learning_rate": 4.969568394595803e-06, "loss": 0.4416, "step": 8114 }, { "epoch": 1.03, "grad_norm": 1.0921393311376757, "learning_rate": 4.968536828144497e-06, "loss": 0.5504, "step": 8115 }, { "epoch": 1.03, "grad_norm": 0.7745381813738811, "learning_rate": 4.967505263032476e-06, "loss": 0.5733, "step": 8116 }, { "epoch": 1.03, "grad_norm": 0.7768241016381884, "learning_rate": 4.966473699303654e-06, "loss": 0.4973, "step": 8117 }, { "epoch": 1.03, "grad_norm": 0.5180697857474678, "learning_rate": 4.965442137001939e-06, "loss": 0.4182, "step": 8118 }, { "epoch": 1.03, "grad_norm": 0.6311950676772196, "learning_rate": 4.964410576171243e-06, "loss": 0.4924, "step": 8119 }, { "epoch": 1.03, "grad_norm": 0.7820784613143212, "learning_rate": 4.9633790168554735e-06, "loss": 0.4804, "step": 8120 }, { "epoch": 1.03, "grad_norm": 0.7414341015896759, "learning_rate": 4.962347459098542e-06, "loss": 0.5369, "step": 8121 }, { "epoch": 1.03, "grad_norm": 0.8201712140698587, "learning_rate": 4.96131590294436e-06, "loss": 0.4781, "step": 8122 }, { "epoch": 1.03, "grad_norm": 0.6621278595704478, "learning_rate": 4.960284348436837e-06, "loss": 0.478, "step": 8123 }, { "epoch": 1.03, "grad_norm": 0.7318612303570251, "learning_rate": 4.959252795619881e-06, "loss": 0.4795, "step": 8124 }, { "epoch": 1.04, "grad_norm": 0.6843648599542215, "learning_rate": 4.958221244537404e-06, "loss": 0.4949, "step": 8125 }, { "epoch": 1.04, "grad_norm": 0.8638883665995928, "learning_rate": 4.9571896952333145e-06, "loss": 0.5158, "step": 8126 }, { "epoch": 1.04, "grad_norm": 0.8735920736913573, "learning_rate": 4.956158147751523e-06, "loss": 0.4905, "step": 8127 }, { "epoch": 1.04, "grad_norm": 0.5738007049937406, "learning_rate": 4.955126602135938e-06, "loss": 0.4868, "step": 8128 }, { "epoch": 1.04, "grad_norm": 0.6219592933355601, "learning_rate": 4.954095058430471e-06, "loss": 0.4739, "step": 8129 }, { "epoch": 1.04, "grad_norm": 0.7957876962280005, "learning_rate": 4.953063516679031e-06, "loss": 0.5083, "step": 8130 }, { "epoch": 1.04, "grad_norm": 0.6263593027929184, "learning_rate": 4.952031976925528e-06, "loss": 0.469, "step": 8131 }, { "epoch": 1.04, "grad_norm": 0.6326247401095945, "learning_rate": 4.951000439213869e-06, "loss": 0.4568, "step": 8132 }, { "epoch": 1.04, "grad_norm": 0.5686671787418717, "learning_rate": 4.949968903587966e-06, "loss": 0.4107, "step": 8133 }, { "epoch": 1.04, "grad_norm": 0.6770448684844186, "learning_rate": 4.948937370091726e-06, "loss": 0.4586, "step": 8134 }, { "epoch": 1.04, "grad_norm": 0.6198756006943074, "learning_rate": 4.947905838769059e-06, "loss": 0.4489, "step": 8135 }, { "epoch": 1.04, "grad_norm": 0.588322239979068, "learning_rate": 4.946874309663875e-06, "loss": 0.4786, "step": 8136 }, { "epoch": 1.04, "grad_norm": 0.7281434699718707, "learning_rate": 4.945842782820081e-06, "loss": 0.4367, "step": 8137 }, { "epoch": 1.04, "grad_norm": 0.6656070348463957, "learning_rate": 4.944811258281586e-06, "loss": 0.4938, "step": 8138 }, { "epoch": 1.04, "grad_norm": 0.769071296361864, "learning_rate": 4.9437797360923005e-06, "loss": 0.5517, "step": 8139 }, { "epoch": 1.04, "grad_norm": 0.679566649812272, "learning_rate": 4.942748216296132e-06, "loss": 0.483, "step": 8140 }, { "epoch": 1.04, "grad_norm": 0.7801902138019788, "learning_rate": 4.941716698936987e-06, "loss": 0.5542, "step": 8141 }, { "epoch": 1.04, "grad_norm": 0.588084511685038, "learning_rate": 4.940685184058778e-06, "loss": 0.4881, "step": 8142 }, { "epoch": 1.04, "grad_norm": 0.7338863472434032, "learning_rate": 4.93965367170541e-06, "loss": 0.4935, "step": 8143 }, { "epoch": 1.04, "grad_norm": 0.5640269128996757, "learning_rate": 4.938622161920793e-06, "loss": 0.4243, "step": 8144 }, { "epoch": 1.04, "grad_norm": 0.6597540894818068, "learning_rate": 4.937590654748835e-06, "loss": 0.4653, "step": 8145 }, { "epoch": 1.04, "grad_norm": 0.6558563852646573, "learning_rate": 4.936559150233443e-06, "loss": 0.4711, "step": 8146 }, { "epoch": 1.04, "grad_norm": 0.6021679819291212, "learning_rate": 4.935527648418524e-06, "loss": 0.4561, "step": 8147 }, { "epoch": 1.04, "grad_norm": 0.774985474628311, "learning_rate": 4.9344961493479885e-06, "loss": 0.4616, "step": 8148 }, { "epoch": 1.04, "grad_norm": 0.790357340425759, "learning_rate": 4.9334646530657415e-06, "loss": 0.5451, "step": 8149 }, { "epoch": 1.04, "grad_norm": 0.6770652021077532, "learning_rate": 4.932433159615693e-06, "loss": 0.4564, "step": 8150 }, { "epoch": 1.04, "grad_norm": 0.6783212701251177, "learning_rate": 4.931401669041748e-06, "loss": 0.4789, "step": 8151 }, { "epoch": 1.04, "grad_norm": 0.796682217235123, "learning_rate": 4.9303701813878144e-06, "loss": 0.5437, "step": 8152 }, { "epoch": 1.04, "grad_norm": 1.0071332259830925, "learning_rate": 4.9293386966977994e-06, "loss": 0.5755, "step": 8153 }, { "epoch": 1.04, "grad_norm": 2.5310500434845555, "learning_rate": 4.928307215015611e-06, "loss": 0.5354, "step": 8154 }, { "epoch": 1.04, "grad_norm": 0.6400968540349614, "learning_rate": 4.9272757363851555e-06, "loss": 0.4971, "step": 8155 }, { "epoch": 1.04, "grad_norm": 0.7638101579938189, "learning_rate": 4.92624426085034e-06, "loss": 0.5345, "step": 8156 }, { "epoch": 1.04, "grad_norm": 0.6978492382260936, "learning_rate": 4.92521278845507e-06, "loss": 0.4906, "step": 8157 }, { "epoch": 1.04, "grad_norm": 0.7935373841448372, "learning_rate": 4.924181319243253e-06, "loss": 0.5321, "step": 8158 }, { "epoch": 1.04, "grad_norm": 0.8144939574626947, "learning_rate": 4.923149853258795e-06, "loss": 0.5151, "step": 8159 }, { "epoch": 1.04, "grad_norm": 0.5847324438157572, "learning_rate": 4.922118390545602e-06, "loss": 0.4286, "step": 8160 }, { "epoch": 1.04, "grad_norm": 0.5837197646959561, "learning_rate": 4.92108693114758e-06, "loss": 0.4735, "step": 8161 }, { "epoch": 1.04, "grad_norm": 0.6576688912176004, "learning_rate": 4.9200554751086354e-06, "loss": 0.472, "step": 8162 }, { "epoch": 1.04, "grad_norm": 0.7806261387489312, "learning_rate": 4.919024022472674e-06, "loss": 0.5474, "step": 8163 }, { "epoch": 1.04, "grad_norm": 0.7440678771464696, "learning_rate": 4.9179925732836e-06, "loss": 0.4915, "step": 8164 }, { "epoch": 1.04, "grad_norm": 0.6270807035856898, "learning_rate": 4.916961127585322e-06, "loss": 0.4168, "step": 8165 }, { "epoch": 1.04, "grad_norm": 0.6062933713288158, "learning_rate": 4.91592968542174e-06, "loss": 0.4253, "step": 8166 }, { "epoch": 1.04, "grad_norm": 0.8093407222557867, "learning_rate": 4.914898246836764e-06, "loss": 0.5136, "step": 8167 }, { "epoch": 1.04, "grad_norm": 0.6658720365941423, "learning_rate": 4.9138668118742994e-06, "loss": 0.5368, "step": 8168 }, { "epoch": 1.04, "grad_norm": 0.8033865420421944, "learning_rate": 4.912835380578249e-06, "loss": 0.5311, "step": 8169 }, { "epoch": 1.04, "grad_norm": 0.7002685475646482, "learning_rate": 4.911803952992518e-06, "loss": 0.496, "step": 8170 }, { "epoch": 1.04, "grad_norm": 0.6824569246723416, "learning_rate": 4.910772529161009e-06, "loss": 0.4896, "step": 8171 }, { "epoch": 1.04, "grad_norm": 0.6926253996415783, "learning_rate": 4.909741109127631e-06, "loss": 0.534, "step": 8172 }, { "epoch": 1.04, "grad_norm": 1.101069266180054, "learning_rate": 4.908709692936284e-06, "loss": 0.4662, "step": 8173 }, { "epoch": 1.04, "grad_norm": 0.7384209669025243, "learning_rate": 4.907678280630874e-06, "loss": 0.491, "step": 8174 }, { "epoch": 1.04, "grad_norm": 0.6977103742536973, "learning_rate": 4.906646872255305e-06, "loss": 0.5196, "step": 8175 }, { "epoch": 1.04, "grad_norm": 0.628473299941772, "learning_rate": 4.90561546785348e-06, "loss": 0.5175, "step": 8176 }, { "epoch": 1.04, "grad_norm": 0.8071292100474738, "learning_rate": 4.904584067469303e-06, "loss": 0.5382, "step": 8177 }, { "epoch": 1.04, "grad_norm": 0.7617304133126185, "learning_rate": 4.903552671146675e-06, "loss": 0.581, "step": 8178 }, { "epoch": 1.04, "grad_norm": 0.7305998341939557, "learning_rate": 4.902521278929504e-06, "loss": 0.4861, "step": 8179 }, { "epoch": 1.04, "grad_norm": 0.5931602093389495, "learning_rate": 4.90148989086169e-06, "loss": 0.4389, "step": 8180 }, { "epoch": 1.04, "grad_norm": 0.8133656696254687, "learning_rate": 4.900458506987137e-06, "loss": 0.5124, "step": 8181 }, { "epoch": 1.04, "grad_norm": 0.7196123470267825, "learning_rate": 4.899427127349747e-06, "loss": 0.4968, "step": 8182 }, { "epoch": 1.04, "grad_norm": 0.6756696081050716, "learning_rate": 4.898395751993423e-06, "loss": 0.4096, "step": 8183 }, { "epoch": 1.04, "grad_norm": 0.6306960210407471, "learning_rate": 4.897364380962068e-06, "loss": 0.4724, "step": 8184 }, { "epoch": 1.04, "grad_norm": 0.6040945195355162, "learning_rate": 4.8963330142995826e-06, "loss": 0.4662, "step": 8185 }, { "epoch": 1.04, "grad_norm": 0.7188606257204347, "learning_rate": 4.895301652049869e-06, "loss": 0.5029, "step": 8186 }, { "epoch": 1.04, "grad_norm": 0.6713362921139758, "learning_rate": 4.8942702942568305e-06, "loss": 0.456, "step": 8187 }, { "epoch": 1.04, "grad_norm": 0.7191827252342243, "learning_rate": 4.893238940964367e-06, "loss": 0.5407, "step": 8188 }, { "epoch": 1.04, "grad_norm": 0.7805356564484168, "learning_rate": 4.8922075922163804e-06, "loss": 0.5193, "step": 8189 }, { "epoch": 1.04, "grad_norm": 0.822079346452177, "learning_rate": 4.891176248056771e-06, "loss": 0.5152, "step": 8190 }, { "epoch": 1.04, "grad_norm": 0.7058846562952281, "learning_rate": 4.890144908529442e-06, "loss": 0.5226, "step": 8191 }, { "epoch": 1.04, "grad_norm": 0.64522121209394, "learning_rate": 4.889113573678294e-06, "loss": 0.5133, "step": 8192 }, { "epoch": 1.04, "grad_norm": 0.7002708813522056, "learning_rate": 4.888082243547226e-06, "loss": 0.5312, "step": 8193 }, { "epoch": 1.04, "grad_norm": 0.764951850368504, "learning_rate": 4.88705091818014e-06, "loss": 0.5119, "step": 8194 }, { "epoch": 1.04, "grad_norm": 0.76976036414379, "learning_rate": 4.8860195976209354e-06, "loss": 0.5069, "step": 8195 }, { "epoch": 1.04, "grad_norm": 0.8286293076818172, "learning_rate": 4.884988281913512e-06, "loss": 0.5364, "step": 8196 }, { "epoch": 1.04, "grad_norm": 0.7572316449150073, "learning_rate": 4.88395697110177e-06, "loss": 0.4912, "step": 8197 }, { "epoch": 1.04, "grad_norm": 0.7723155499331286, "learning_rate": 4.88292566522961e-06, "loss": 0.4772, "step": 8198 }, { "epoch": 1.04, "grad_norm": 0.6078057153870111, "learning_rate": 4.881894364340929e-06, "loss": 0.517, "step": 8199 }, { "epoch": 1.04, "grad_norm": 0.7053595877798743, "learning_rate": 4.880863068479628e-06, "loss": 0.5052, "step": 8200 }, { "epoch": 1.04, "grad_norm": 0.6593743158834144, "learning_rate": 4.879831777689606e-06, "loss": 0.4666, "step": 8201 }, { "epoch": 1.04, "grad_norm": 0.6361065123969063, "learning_rate": 4.87880049201476e-06, "loss": 0.4491, "step": 8202 }, { "epoch": 1.05, "grad_norm": 0.5716349027885537, "learning_rate": 4.877769211498989e-06, "loss": 0.4283, "step": 8203 }, { "epoch": 1.05, "grad_norm": 0.5720540039427114, "learning_rate": 4.876737936186193e-06, "loss": 0.4775, "step": 8204 }, { "epoch": 1.05, "grad_norm": 0.6165947747468568, "learning_rate": 4.875706666120269e-06, "loss": 0.4801, "step": 8205 }, { "epoch": 1.05, "grad_norm": 0.792961072769179, "learning_rate": 4.874675401345116e-06, "loss": 0.5268, "step": 8206 }, { "epoch": 1.05, "grad_norm": 0.815287152901661, "learning_rate": 4.8736441419046305e-06, "loss": 0.4854, "step": 8207 }, { "epoch": 1.05, "grad_norm": 0.9903397150894573, "learning_rate": 4.87261288784271e-06, "loss": 0.5335, "step": 8208 }, { "epoch": 1.05, "grad_norm": 0.7595958288211314, "learning_rate": 4.871581639203251e-06, "loss": 0.5459, "step": 8209 }, { "epoch": 1.05, "grad_norm": 0.6256464266848701, "learning_rate": 4.8705503960301515e-06, "loss": 0.4494, "step": 8210 }, { "epoch": 1.05, "grad_norm": 0.6687912366964109, "learning_rate": 4.869519158367308e-06, "loss": 0.4776, "step": 8211 }, { "epoch": 1.05, "grad_norm": 0.801744174634166, "learning_rate": 4.8684879262586175e-06, "loss": 0.5669, "step": 8212 }, { "epoch": 1.05, "grad_norm": 0.7645578124566696, "learning_rate": 4.867456699747975e-06, "loss": 0.5607, "step": 8213 }, { "epoch": 1.05, "grad_norm": 0.7216416974605512, "learning_rate": 4.866425478879279e-06, "loss": 0.5119, "step": 8214 }, { "epoch": 1.05, "grad_norm": 0.7459689271359239, "learning_rate": 4.86539426369642e-06, "loss": 0.4735, "step": 8215 }, { "epoch": 1.05, "grad_norm": 0.6405112781348862, "learning_rate": 4.8643630542433005e-06, "loss": 0.5296, "step": 8216 }, { "epoch": 1.05, "grad_norm": 0.8086311427381001, "learning_rate": 4.863331850563811e-06, "loss": 0.5644, "step": 8217 }, { "epoch": 1.05, "grad_norm": 1.0809850199777922, "learning_rate": 4.8623006527018475e-06, "loss": 0.5506, "step": 8218 }, { "epoch": 1.05, "grad_norm": 0.8355964877091129, "learning_rate": 4.861269460701306e-06, "loss": 0.4848, "step": 8219 }, { "epoch": 1.05, "grad_norm": 0.6083604806636115, "learning_rate": 4.86023827460608e-06, "loss": 0.4785, "step": 8220 }, { "epoch": 1.05, "grad_norm": 0.8007060967062233, "learning_rate": 4.859207094460065e-06, "loss": 0.5167, "step": 8221 }, { "epoch": 1.05, "grad_norm": 0.8139772792869403, "learning_rate": 4.858175920307153e-06, "loss": 0.5315, "step": 8222 }, { "epoch": 1.05, "grad_norm": 0.6990414904321893, "learning_rate": 4.857144752191238e-06, "loss": 0.5069, "step": 8223 }, { "epoch": 1.05, "grad_norm": 0.7490374445137454, "learning_rate": 4.856113590156216e-06, "loss": 0.5491, "step": 8224 }, { "epoch": 1.05, "grad_norm": 0.7928538358900609, "learning_rate": 4.855082434245978e-06, "loss": 0.5334, "step": 8225 }, { "epoch": 1.05, "grad_norm": 0.664875956858229, "learning_rate": 4.854051284504418e-06, "loss": 0.5, "step": 8226 }, { "epoch": 1.05, "grad_norm": 0.8205903752505009, "learning_rate": 4.8530201409754285e-06, "loss": 0.5423, "step": 8227 }, { "epoch": 1.05, "grad_norm": 0.8122183398227888, "learning_rate": 4.8519890037029e-06, "loss": 0.5103, "step": 8228 }, { "epoch": 1.05, "grad_norm": 0.6594882528654233, "learning_rate": 4.850957872730728e-06, "loss": 0.4827, "step": 8229 }, { "epoch": 1.05, "grad_norm": 0.6273417028965672, "learning_rate": 4.849926748102803e-06, "loss": 0.4645, "step": 8230 }, { "epoch": 1.05, "grad_norm": 0.6912005844276828, "learning_rate": 4.848895629863018e-06, "loss": 0.4743, "step": 8231 }, { "epoch": 1.05, "grad_norm": 0.7023085892697838, "learning_rate": 4.847864518055261e-06, "loss": 0.4735, "step": 8232 }, { "epoch": 1.05, "grad_norm": 0.5654344139350759, "learning_rate": 4.8468334127234275e-06, "loss": 0.4346, "step": 8233 }, { "epoch": 1.05, "grad_norm": 0.6681639496497829, "learning_rate": 4.845802313911405e-06, "loss": 0.4418, "step": 8234 }, { "epoch": 1.05, "grad_norm": 0.6608505034354452, "learning_rate": 4.844771221663086e-06, "loss": 0.4885, "step": 8235 }, { "epoch": 1.05, "grad_norm": 0.6250645790722317, "learning_rate": 4.843740136022359e-06, "loss": 0.5025, "step": 8236 }, { "epoch": 1.05, "grad_norm": 0.7429447805268162, "learning_rate": 4.842709057033116e-06, "loss": 0.4715, "step": 8237 }, { "epoch": 1.05, "grad_norm": 0.5633102581255295, "learning_rate": 4.841677984739245e-06, "loss": 0.4234, "step": 8238 }, { "epoch": 1.05, "grad_norm": 0.5980258866552804, "learning_rate": 4.8406469191846374e-06, "loss": 0.4852, "step": 8239 }, { "epoch": 1.05, "grad_norm": 0.7968220275413964, "learning_rate": 4.839615860413178e-06, "loss": 0.5105, "step": 8240 }, { "epoch": 1.05, "grad_norm": 0.5617390826528584, "learning_rate": 4.838584808468761e-06, "loss": 0.4465, "step": 8241 }, { "epoch": 1.05, "grad_norm": 0.6124345282934126, "learning_rate": 4.837553763395274e-06, "loss": 0.4664, "step": 8242 }, { "epoch": 1.05, "grad_norm": 0.529315244232884, "learning_rate": 4.836522725236604e-06, "loss": 0.4149, "step": 8243 }, { "epoch": 1.05, "grad_norm": 0.778035947403778, "learning_rate": 4.835491694036638e-06, "loss": 0.4814, "step": 8244 }, { "epoch": 1.05, "grad_norm": 0.6791867705029339, "learning_rate": 4.834460669839266e-06, "loss": 0.5016, "step": 8245 }, { "epoch": 1.05, "grad_norm": 0.5673865733446531, "learning_rate": 4.833429652688374e-06, "loss": 0.4406, "step": 8246 }, { "epoch": 1.05, "grad_norm": 0.53186758935031, "learning_rate": 4.832398642627849e-06, "loss": 0.4186, "step": 8247 }, { "epoch": 1.05, "grad_norm": 0.6744997106497785, "learning_rate": 4.831367639701579e-06, "loss": 0.4676, "step": 8248 }, { "epoch": 1.05, "grad_norm": 0.7006821578003772, "learning_rate": 4.830336643953449e-06, "loss": 0.5783, "step": 8249 }, { "epoch": 1.05, "grad_norm": 0.7494961788681345, "learning_rate": 4.829305655427346e-06, "loss": 0.4503, "step": 8250 }, { "epoch": 1.05, "grad_norm": 0.7960997699671731, "learning_rate": 4.828274674167156e-06, "loss": 0.5512, "step": 8251 }, { "epoch": 1.05, "grad_norm": 0.7252706700449726, "learning_rate": 4.827243700216762e-06, "loss": 0.5438, "step": 8252 }, { "epoch": 1.05, "grad_norm": 0.6124893123667524, "learning_rate": 4.826212733620054e-06, "loss": 0.4649, "step": 8253 }, { "epoch": 1.05, "grad_norm": 0.6201293585842711, "learning_rate": 4.825181774420915e-06, "loss": 0.4471, "step": 8254 }, { "epoch": 1.05, "grad_norm": 0.6010911318106227, "learning_rate": 4.8241508226632285e-06, "loss": 0.4478, "step": 8255 }, { "epoch": 1.05, "grad_norm": 0.6629589461654971, "learning_rate": 4.82311987839088e-06, "loss": 0.468, "step": 8256 }, { "epoch": 1.05, "grad_norm": 0.7122458328776413, "learning_rate": 4.822088941647753e-06, "loss": 0.4981, "step": 8257 }, { "epoch": 1.05, "grad_norm": 0.7225105454312586, "learning_rate": 4.821058012477731e-06, "loss": 0.4917, "step": 8258 }, { "epoch": 1.05, "grad_norm": 0.6356762569082717, "learning_rate": 4.820027090924698e-06, "loss": 0.5199, "step": 8259 }, { "epoch": 1.05, "grad_norm": 0.7390531461066636, "learning_rate": 4.818996177032536e-06, "loss": 0.4906, "step": 8260 }, { "epoch": 1.05, "grad_norm": 0.6715414809108625, "learning_rate": 4.817965270845129e-06, "loss": 0.4907, "step": 8261 }, { "epoch": 1.05, "grad_norm": 0.7872491401963878, "learning_rate": 4.8169343724063574e-06, "loss": 0.521, "step": 8262 }, { "epoch": 1.05, "grad_norm": 0.8312153903219698, "learning_rate": 4.8159034817601055e-06, "loss": 0.5818, "step": 8263 }, { "epoch": 1.05, "grad_norm": 0.7546787280603171, "learning_rate": 4.814872598950255e-06, "loss": 0.5496, "step": 8264 }, { "epoch": 1.05, "grad_norm": 0.7716455672093985, "learning_rate": 4.813841724020684e-06, "loss": 0.5266, "step": 8265 }, { "epoch": 1.05, "grad_norm": 0.7240966792837004, "learning_rate": 4.812810857015278e-06, "loss": 0.5492, "step": 8266 }, { "epoch": 1.05, "grad_norm": 0.6694567122072812, "learning_rate": 4.811779997977914e-06, "loss": 0.4808, "step": 8267 }, { "epoch": 1.05, "grad_norm": 0.8307680593191434, "learning_rate": 4.8107491469524756e-06, "loss": 0.5309, "step": 8268 }, { "epoch": 1.05, "grad_norm": 0.6751536402282458, "learning_rate": 4.809718303982841e-06, "loss": 0.4805, "step": 8269 }, { "epoch": 1.05, "grad_norm": 0.6384524355478715, "learning_rate": 4.8086874691128896e-06, "loss": 0.4779, "step": 8270 }, { "epoch": 1.05, "grad_norm": 0.8092827043226495, "learning_rate": 4.807656642386501e-06, "loss": 0.5593, "step": 8271 }, { "epoch": 1.05, "grad_norm": 0.6841599902671923, "learning_rate": 4.806625823847555e-06, "loss": 0.4846, "step": 8272 }, { "epoch": 1.05, "grad_norm": 0.6388523843509164, "learning_rate": 4.80559501353993e-06, "loss": 0.4693, "step": 8273 }, { "epoch": 1.05, "grad_norm": 0.686568112462965, "learning_rate": 4.804564211507504e-06, "loss": 0.4866, "step": 8274 }, { "epoch": 1.05, "grad_norm": 0.7377607432941956, "learning_rate": 4.803533417794155e-06, "loss": 0.5379, "step": 8275 }, { "epoch": 1.05, "grad_norm": 0.6125103250502069, "learning_rate": 4.80250263244376e-06, "loss": 0.45, "step": 8276 }, { "epoch": 1.05, "grad_norm": 0.612603928040167, "learning_rate": 4.8014718555001964e-06, "loss": 0.5093, "step": 8277 }, { "epoch": 1.05, "grad_norm": 0.6598029494031642, "learning_rate": 4.800441087007342e-06, "loss": 0.5087, "step": 8278 }, { "epoch": 1.05, "grad_norm": 0.6311228028854338, "learning_rate": 4.7994103270090735e-06, "loss": 0.5057, "step": 8279 }, { "epoch": 1.05, "grad_norm": 0.7268557525529523, "learning_rate": 4.798379575549266e-06, "loss": 0.5394, "step": 8280 }, { "epoch": 1.05, "grad_norm": 0.5701984099904688, "learning_rate": 4.797348832671796e-06, "loss": 0.4342, "step": 8281 }, { "epoch": 1.06, "grad_norm": 0.6421093650108104, "learning_rate": 4.796318098420538e-06, "loss": 0.4723, "step": 8282 }, { "epoch": 1.06, "grad_norm": 0.8177789621404639, "learning_rate": 4.795287372839368e-06, "loss": 0.5494, "step": 8283 }, { "epoch": 1.06, "grad_norm": 0.9362342834889982, "learning_rate": 4.794256655972161e-06, "loss": 0.5064, "step": 8284 }, { "epoch": 1.06, "grad_norm": 0.7045524933158974, "learning_rate": 4.79322594786279e-06, "loss": 0.509, "step": 8285 }, { "epoch": 1.06, "grad_norm": 0.8361017931108989, "learning_rate": 4.7921952485551295e-06, "loss": 0.5002, "step": 8286 }, { "epoch": 1.06, "grad_norm": 0.6032002726730082, "learning_rate": 4.791164558093054e-06, "loss": 0.4493, "step": 8287 }, { "epoch": 1.06, "grad_norm": 0.6165107148119306, "learning_rate": 4.790133876520435e-06, "loss": 0.4552, "step": 8288 }, { "epoch": 1.06, "grad_norm": 0.721207995116974, "learning_rate": 4.789103203881147e-06, "loss": 0.5762, "step": 8289 }, { "epoch": 1.06, "grad_norm": 0.7628121153743485, "learning_rate": 4.7880725402190595e-06, "loss": 0.5021, "step": 8290 }, { "epoch": 1.06, "grad_norm": 0.8310262869235524, "learning_rate": 4.787041885578048e-06, "loss": 0.5734, "step": 8291 }, { "epoch": 1.06, "grad_norm": 0.774089249774509, "learning_rate": 4.7860112400019834e-06, "loss": 0.5021, "step": 8292 }, { "epoch": 1.06, "grad_norm": 0.6050785921716553, "learning_rate": 4.784980603534737e-06, "loss": 0.4829, "step": 8293 }, { "epoch": 1.06, "grad_norm": 0.6850084566210694, "learning_rate": 4.783949976220179e-06, "loss": 0.5018, "step": 8294 }, { "epoch": 1.06, "grad_norm": 1.0573215637059092, "learning_rate": 4.782919358102179e-06, "loss": 0.4795, "step": 8295 }, { "epoch": 1.06, "grad_norm": 0.6547241669697361, "learning_rate": 4.78188874922461e-06, "loss": 0.4866, "step": 8296 }, { "epoch": 1.06, "grad_norm": 0.6478676320357795, "learning_rate": 4.7808581496313385e-06, "loss": 0.4525, "step": 8297 }, { "epoch": 1.06, "grad_norm": 0.6843452654307315, "learning_rate": 4.779827559366236e-06, "loss": 0.4987, "step": 8298 }, { "epoch": 1.06, "grad_norm": 0.749033531854335, "learning_rate": 4.778796978473171e-06, "loss": 0.4797, "step": 8299 }, { "epoch": 1.06, "grad_norm": 0.8176461849848516, "learning_rate": 4.777766406996011e-06, "loss": 0.5129, "step": 8300 }, { "epoch": 1.06, "grad_norm": 0.5705261620536343, "learning_rate": 4.776735844978626e-06, "loss": 0.4314, "step": 8301 }, { "epoch": 1.06, "grad_norm": 0.6700612230120924, "learning_rate": 4.77570529246488e-06, "loss": 0.4941, "step": 8302 }, { "epoch": 1.06, "grad_norm": 0.6228444338642064, "learning_rate": 4.774674749498645e-06, "loss": 0.5081, "step": 8303 }, { "epoch": 1.06, "grad_norm": 0.7172664407984548, "learning_rate": 4.773644216123785e-06, "loss": 0.5206, "step": 8304 }, { "epoch": 1.06, "grad_norm": 0.7464573812938363, "learning_rate": 4.772613692384168e-06, "loss": 0.5402, "step": 8305 }, { "epoch": 1.06, "grad_norm": 0.7071676505134432, "learning_rate": 4.77158317832366e-06, "loss": 0.5094, "step": 8306 }, { "epoch": 1.06, "grad_norm": 0.5489632515712463, "learning_rate": 4.770552673986125e-06, "loss": 0.4181, "step": 8307 }, { "epoch": 1.06, "grad_norm": 0.7151312126475369, "learning_rate": 4.7695221794154315e-06, "loss": 0.5255, "step": 8308 }, { "epoch": 1.06, "grad_norm": 0.8168589284014309, "learning_rate": 4.768491694655441e-06, "loss": 0.5374, "step": 8309 }, { "epoch": 1.06, "grad_norm": 0.7398942962946595, "learning_rate": 4.7674612197500194e-06, "loss": 0.5418, "step": 8310 }, { "epoch": 1.06, "grad_norm": 0.6906132880351743, "learning_rate": 4.76643075474303e-06, "loss": 0.5171, "step": 8311 }, { "epoch": 1.06, "grad_norm": 0.7496395408570655, "learning_rate": 4.7654002996783375e-06, "loss": 0.4947, "step": 8312 }, { "epoch": 1.06, "grad_norm": 0.6958341591522413, "learning_rate": 4.764369854599805e-06, "loss": 0.5191, "step": 8313 }, { "epoch": 1.06, "grad_norm": 0.7685756429343458, "learning_rate": 4.763339419551292e-06, "loss": 0.5214, "step": 8314 }, { "epoch": 1.06, "grad_norm": 0.6372399625193196, "learning_rate": 4.762308994576666e-06, "loss": 0.4885, "step": 8315 }, { "epoch": 1.06, "grad_norm": 0.6102657045220327, "learning_rate": 4.7612785797197865e-06, "loss": 0.4684, "step": 8316 }, { "epoch": 1.06, "grad_norm": 0.6579304653084039, "learning_rate": 4.760248175024515e-06, "loss": 0.4952, "step": 8317 }, { "epoch": 1.06, "grad_norm": 0.7343753218958654, "learning_rate": 4.759217780534713e-06, "loss": 0.4803, "step": 8318 }, { "epoch": 1.06, "grad_norm": 0.7741027393340881, "learning_rate": 4.758187396294241e-06, "loss": 0.529, "step": 8319 }, { "epoch": 1.06, "grad_norm": 0.7667035487416406, "learning_rate": 4.7571570223469575e-06, "loss": 0.5363, "step": 8320 }, { "epoch": 1.06, "grad_norm": 0.6107481436895474, "learning_rate": 4.756126658736725e-06, "loss": 0.4528, "step": 8321 }, { "epoch": 1.06, "grad_norm": 0.6737471703064847, "learning_rate": 4.7550963055074e-06, "loss": 0.4782, "step": 8322 }, { "epoch": 1.06, "grad_norm": 0.8540866683262708, "learning_rate": 4.754065962702843e-06, "loss": 0.532, "step": 8323 }, { "epoch": 1.06, "grad_norm": 1.1902404904276669, "learning_rate": 4.753035630366913e-06, "loss": 0.5155, "step": 8324 }, { "epoch": 1.06, "grad_norm": 0.5897317747309352, "learning_rate": 4.752005308543466e-06, "loss": 0.4303, "step": 8325 }, { "epoch": 1.06, "grad_norm": 0.7012971327486279, "learning_rate": 4.750974997276361e-06, "loss": 0.4871, "step": 8326 }, { "epoch": 1.06, "grad_norm": 0.7640488877322428, "learning_rate": 4.749944696609453e-06, "loss": 0.541, "step": 8327 }, { "epoch": 1.06, "grad_norm": 0.7737930288919439, "learning_rate": 4.748914406586602e-06, "loss": 0.5053, "step": 8328 }, { "epoch": 1.06, "grad_norm": 0.5911253142058805, "learning_rate": 4.7478841272516616e-06, "loss": 0.4478, "step": 8329 }, { "epoch": 1.06, "grad_norm": 0.5795290210295466, "learning_rate": 4.746853858648489e-06, "loss": 0.4683, "step": 8330 }, { "epoch": 1.06, "grad_norm": 0.6489249567019832, "learning_rate": 4.745823600820939e-06, "loss": 0.4687, "step": 8331 }, { "epoch": 1.06, "grad_norm": 0.7705164626644723, "learning_rate": 4.7447933538128634e-06, "loss": 0.5312, "step": 8332 }, { "epoch": 1.06, "grad_norm": 0.6608120498356257, "learning_rate": 4.743763117668121e-06, "loss": 0.5045, "step": 8333 }, { "epoch": 1.06, "grad_norm": 0.7930373929415717, "learning_rate": 4.742732892430565e-06, "loss": 0.5384, "step": 8334 }, { "epoch": 1.06, "grad_norm": 0.7543329925328313, "learning_rate": 4.741702678144047e-06, "loss": 0.5222, "step": 8335 }, { "epoch": 1.06, "grad_norm": 0.5520664241237638, "learning_rate": 4.74067247485242e-06, "loss": 0.4485, "step": 8336 }, { "epoch": 1.06, "grad_norm": 0.7122324134902953, "learning_rate": 4.739642282599538e-06, "loss": 0.4918, "step": 8337 }, { "epoch": 1.06, "grad_norm": 0.680478604032049, "learning_rate": 4.7386121014292505e-06, "loss": 0.5085, "step": 8338 }, { "epoch": 1.06, "grad_norm": 0.5600107710486806, "learning_rate": 4.737581931385411e-06, "loss": 0.4584, "step": 8339 }, { "epoch": 1.06, "grad_norm": 0.6896580366720484, "learning_rate": 4.73655177251187e-06, "loss": 0.4906, "step": 8340 }, { "epoch": 1.06, "grad_norm": 0.963977787932417, "learning_rate": 4.73552162485248e-06, "loss": 0.5213, "step": 8341 }, { "epoch": 1.06, "grad_norm": 0.7043908635505579, "learning_rate": 4.734491488451087e-06, "loss": 0.4862, "step": 8342 }, { "epoch": 1.06, "grad_norm": 0.6881430654845729, "learning_rate": 4.733461363351544e-06, "loss": 0.5056, "step": 8343 }, { "epoch": 1.06, "grad_norm": 0.7424707899379075, "learning_rate": 4.7324312495976994e-06, "loss": 0.5498, "step": 8344 }, { "epoch": 1.06, "grad_norm": 0.6172766748002575, "learning_rate": 4.731401147233402e-06, "loss": 0.471, "step": 8345 }, { "epoch": 1.06, "grad_norm": 0.6407102122432393, "learning_rate": 4.730371056302498e-06, "loss": 0.4529, "step": 8346 }, { "epoch": 1.06, "grad_norm": 0.6719462297994628, "learning_rate": 4.7293409768488365e-06, "loss": 0.4964, "step": 8347 }, { "epoch": 1.06, "grad_norm": 0.6584934755649497, "learning_rate": 4.728310908916266e-06, "loss": 0.4518, "step": 8348 }, { "epoch": 1.06, "grad_norm": 0.662125686194116, "learning_rate": 4.727280852548632e-06, "loss": 0.4953, "step": 8349 }, { "epoch": 1.06, "grad_norm": 0.6914782045059399, "learning_rate": 4.726250807789779e-06, "loss": 0.539, "step": 8350 }, { "epoch": 1.06, "grad_norm": 0.6847308364637184, "learning_rate": 4.725220774683555e-06, "loss": 0.4303, "step": 8351 }, { "epoch": 1.06, "grad_norm": 0.598398400830368, "learning_rate": 4.724190753273803e-06, "loss": 0.4903, "step": 8352 }, { "epoch": 1.06, "grad_norm": 0.7146368773370222, "learning_rate": 4.723160743604371e-06, "loss": 0.492, "step": 8353 }, { "epoch": 1.06, "grad_norm": 0.686039990221673, "learning_rate": 4.7221307457191014e-06, "loss": 0.4945, "step": 8354 }, { "epoch": 1.06, "grad_norm": 0.6349715579352723, "learning_rate": 4.721100759661838e-06, "loss": 0.5377, "step": 8355 }, { "epoch": 1.06, "grad_norm": 0.7781684646665405, "learning_rate": 4.720070785476424e-06, "loss": 0.5561, "step": 8356 }, { "epoch": 1.06, "grad_norm": 0.8482692988535854, "learning_rate": 4.719040823206702e-06, "loss": 0.5792, "step": 8357 }, { "epoch": 1.06, "grad_norm": 0.7842530178277606, "learning_rate": 4.718010872896515e-06, "loss": 0.5251, "step": 8358 }, { "epoch": 1.06, "grad_norm": 0.7524235995930882, "learning_rate": 4.716980934589703e-06, "loss": 0.5123, "step": 8359 }, { "epoch": 1.07, "grad_norm": 0.8094302828328926, "learning_rate": 4.7159510083301095e-06, "loss": 0.512, "step": 8360 }, { "epoch": 1.07, "grad_norm": 0.5494737537226451, "learning_rate": 4.714921094161573e-06, "loss": 0.4264, "step": 8361 }, { "epoch": 1.07, "grad_norm": 0.7242920027653952, "learning_rate": 4.713891192127935e-06, "loss": 0.538, "step": 8362 }, { "epoch": 1.07, "grad_norm": 0.702317673340286, "learning_rate": 4.712861302273034e-06, "loss": 0.4911, "step": 8363 }, { "epoch": 1.07, "grad_norm": 0.6389843628300853, "learning_rate": 4.7118314246407084e-06, "loss": 0.4368, "step": 8364 }, { "epoch": 1.07, "grad_norm": 0.6049971019201188, "learning_rate": 4.7108015592748005e-06, "loss": 0.4483, "step": 8365 }, { "epoch": 1.07, "grad_norm": 0.6903272201243086, "learning_rate": 4.709771706219145e-06, "loss": 0.4091, "step": 8366 }, { "epoch": 1.07, "grad_norm": 0.7160418672888511, "learning_rate": 4.708741865517581e-06, "loss": 0.5074, "step": 8367 }, { "epoch": 1.07, "grad_norm": 0.6924792251940831, "learning_rate": 4.7077120372139455e-06, "loss": 0.4431, "step": 8368 }, { "epoch": 1.07, "grad_norm": 0.6828736714010408, "learning_rate": 4.706682221352074e-06, "loss": 0.4541, "step": 8369 }, { "epoch": 1.07, "grad_norm": 0.7528016280901391, "learning_rate": 4.705652417975803e-06, "loss": 0.5315, "step": 8370 }, { "epoch": 1.07, "grad_norm": 0.6899681482329726, "learning_rate": 4.704622627128969e-06, "loss": 0.541, "step": 8371 }, { "epoch": 1.07, "grad_norm": 0.8629331194423644, "learning_rate": 4.703592848855405e-06, "loss": 0.6034, "step": 8372 }, { "epoch": 1.07, "grad_norm": 0.765164791092945, "learning_rate": 4.7025630831989465e-06, "loss": 0.5053, "step": 8373 }, { "epoch": 1.07, "grad_norm": 0.6877677701275337, "learning_rate": 4.701533330203427e-06, "loss": 0.5148, "step": 8374 }, { "epoch": 1.07, "grad_norm": 0.7151067895192951, "learning_rate": 4.70050358991268e-06, "loss": 0.5062, "step": 8375 }, { "epoch": 1.07, "grad_norm": 0.7062122950474459, "learning_rate": 4.699473862370535e-06, "loss": 0.4794, "step": 8376 }, { "epoch": 1.07, "grad_norm": 0.5964550116294689, "learning_rate": 4.698444147620831e-06, "loss": 0.451, "step": 8377 }, { "epoch": 1.07, "grad_norm": 0.5786820849146606, "learning_rate": 4.697414445707395e-06, "loss": 0.4311, "step": 8378 }, { "epoch": 1.07, "grad_norm": 0.6020010219440008, "learning_rate": 4.696384756674059e-06, "loss": 0.519, "step": 8379 }, { "epoch": 1.07, "grad_norm": 0.7530596084436874, "learning_rate": 4.695355080564655e-06, "loss": 0.523, "step": 8380 }, { "epoch": 1.07, "grad_norm": 0.6512234287017661, "learning_rate": 4.69432541742301e-06, "loss": 0.4924, "step": 8381 }, { "epoch": 1.07, "grad_norm": 0.7564569630147391, "learning_rate": 4.6932957672929565e-06, "loss": 0.5666, "step": 8382 }, { "epoch": 1.07, "grad_norm": 1.384880958266251, "learning_rate": 4.692266130218322e-06, "loss": 0.4818, "step": 8383 }, { "epoch": 1.07, "grad_norm": 0.6994452017916899, "learning_rate": 4.6912365062429334e-06, "loss": 0.4573, "step": 8384 }, { "epoch": 1.07, "grad_norm": 0.6832932871007706, "learning_rate": 4.690206895410622e-06, "loss": 0.4641, "step": 8385 }, { "epoch": 1.07, "grad_norm": 0.8228968802336903, "learning_rate": 4.689177297765212e-06, "loss": 0.5673, "step": 8386 }, { "epoch": 1.07, "grad_norm": 0.754176995111916, "learning_rate": 4.688147713350532e-06, "loss": 0.5774, "step": 8387 }, { "epoch": 1.07, "grad_norm": 0.8485384255773295, "learning_rate": 4.687118142210407e-06, "loss": 0.5713, "step": 8388 }, { "epoch": 1.07, "grad_norm": 0.9218049433550063, "learning_rate": 4.686088584388661e-06, "loss": 0.5258, "step": 8389 }, { "epoch": 1.07, "grad_norm": 0.7431226167112078, "learning_rate": 4.685059039929123e-06, "loss": 0.5334, "step": 8390 }, { "epoch": 1.07, "grad_norm": 0.7642740832397854, "learning_rate": 4.684029508875615e-06, "loss": 0.5194, "step": 8391 }, { "epoch": 1.07, "grad_norm": 0.6598599128505827, "learning_rate": 4.682999991271961e-06, "loss": 0.4935, "step": 8392 }, { "epoch": 1.07, "grad_norm": 0.6323138682990631, "learning_rate": 4.681970487161984e-06, "loss": 0.496, "step": 8393 }, { "epoch": 1.07, "grad_norm": 1.4095994915849337, "learning_rate": 4.680940996589506e-06, "loss": 0.5157, "step": 8394 }, { "epoch": 1.07, "grad_norm": 0.7512926439974584, "learning_rate": 4.6799115195983515e-06, "loss": 0.48, "step": 8395 }, { "epoch": 1.07, "grad_norm": 0.7554170976018972, "learning_rate": 4.678882056232341e-06, "loss": 0.5167, "step": 8396 }, { "epoch": 1.07, "grad_norm": 0.6039114237446409, "learning_rate": 4.677852606535295e-06, "loss": 0.4556, "step": 8397 }, { "epoch": 1.07, "grad_norm": 0.5463591241004426, "learning_rate": 4.6768231705510346e-06, "loss": 0.4468, "step": 8398 }, { "epoch": 1.07, "grad_norm": 0.6700800719790695, "learning_rate": 4.675793748323378e-06, "loss": 0.4829, "step": 8399 }, { "epoch": 1.07, "grad_norm": 0.5619466381369013, "learning_rate": 4.6747643398961465e-06, "loss": 0.4192, "step": 8400 }, { "epoch": 1.07, "grad_norm": 0.660468492810452, "learning_rate": 4.6737349453131556e-06, "loss": 0.4634, "step": 8401 }, { "epoch": 1.07, "grad_norm": 0.7212004286905599, "learning_rate": 4.672705564618228e-06, "loss": 0.5062, "step": 8402 }, { "epoch": 1.07, "grad_norm": 0.571651013072029, "learning_rate": 4.671676197855178e-06, "loss": 0.4592, "step": 8403 }, { "epoch": 1.07, "grad_norm": 0.6756887980190536, "learning_rate": 4.670646845067823e-06, "loss": 0.494, "step": 8404 }, { "epoch": 1.07, "grad_norm": 0.7638395564295171, "learning_rate": 4.669617506299979e-06, "loss": 0.5114, "step": 8405 }, { "epoch": 1.07, "grad_norm": 0.7078517964132525, "learning_rate": 4.668588181595462e-06, "loss": 0.4377, "step": 8406 }, { "epoch": 1.07, "grad_norm": 0.8162889833163972, "learning_rate": 4.667558870998088e-06, "loss": 0.5008, "step": 8407 }, { "epoch": 1.07, "grad_norm": 0.6307686552880518, "learning_rate": 4.66652957455167e-06, "loss": 0.5279, "step": 8408 }, { "epoch": 1.07, "grad_norm": 0.8860161005715478, "learning_rate": 4.665500292300022e-06, "loss": 0.5348, "step": 8409 }, { "epoch": 1.07, "grad_norm": 0.753833515737956, "learning_rate": 4.6644710242869586e-06, "loss": 0.5833, "step": 8410 }, { "epoch": 1.07, "grad_norm": 0.7902452914431704, "learning_rate": 4.66344177055629e-06, "loss": 0.5909, "step": 8411 }, { "epoch": 1.07, "grad_norm": 0.8910486117109179, "learning_rate": 4.662412531151831e-06, "loss": 0.5373, "step": 8412 }, { "epoch": 1.07, "grad_norm": 0.7628166571938368, "learning_rate": 4.661383306117392e-06, "loss": 0.5369, "step": 8413 }, { "epoch": 1.07, "grad_norm": 0.8626640001914128, "learning_rate": 4.66035409549678e-06, "loss": 0.5132, "step": 8414 }, { "epoch": 1.07, "grad_norm": 0.6707463882512237, "learning_rate": 4.6593248993338125e-06, "loss": 0.5181, "step": 8415 }, { "epoch": 1.07, "grad_norm": 0.7840943731201608, "learning_rate": 4.658295717672295e-06, "loss": 0.5049, "step": 8416 }, { "epoch": 1.07, "grad_norm": 0.7362141479085723, "learning_rate": 4.657266550556036e-06, "loss": 0.5073, "step": 8417 }, { "epoch": 1.07, "grad_norm": 0.7142310809842124, "learning_rate": 4.656237398028846e-06, "loss": 0.5513, "step": 8418 }, { "epoch": 1.07, "grad_norm": 0.7563563098539968, "learning_rate": 4.65520826013453e-06, "loss": 0.5116, "step": 8419 }, { "epoch": 1.07, "grad_norm": 0.643873280231301, "learning_rate": 4.654179136916898e-06, "loss": 0.4757, "step": 8420 }, { "epoch": 1.07, "grad_norm": 0.6268166627092473, "learning_rate": 4.653150028419754e-06, "loss": 0.4558, "step": 8421 }, { "epoch": 1.07, "grad_norm": 0.6315090494745018, "learning_rate": 4.652120934686905e-06, "loss": 0.4817, "step": 8422 }, { "epoch": 1.07, "grad_norm": 0.730675884373918, "learning_rate": 4.651091855762157e-06, "loss": 0.5048, "step": 8423 }, { "epoch": 1.07, "grad_norm": 0.9512091654455435, "learning_rate": 4.650062791689313e-06, "loss": 0.4413, "step": 8424 }, { "epoch": 1.07, "grad_norm": 0.6870553837144314, "learning_rate": 4.649033742512178e-06, "loss": 0.5166, "step": 8425 }, { "epoch": 1.07, "grad_norm": 0.6898670375512296, "learning_rate": 4.648004708274553e-06, "loss": 0.501, "step": 8426 }, { "epoch": 1.07, "grad_norm": 0.6166967528623053, "learning_rate": 4.646975689020244e-06, "loss": 0.4447, "step": 8427 }, { "epoch": 1.07, "grad_norm": 0.7622041988668076, "learning_rate": 4.645946684793053e-06, "loss": 0.5427, "step": 8428 }, { "epoch": 1.07, "grad_norm": 0.9512668191239007, "learning_rate": 4.64491769563678e-06, "loss": 0.5007, "step": 8429 }, { "epoch": 1.07, "grad_norm": 0.6550393821112371, "learning_rate": 4.643888721595226e-06, "loss": 0.4389, "step": 8430 }, { "epoch": 1.07, "grad_norm": 0.5790761205366833, "learning_rate": 4.64285976271219e-06, "loss": 0.4697, "step": 8431 }, { "epoch": 1.07, "grad_norm": 0.7332266616339383, "learning_rate": 4.6418308190314735e-06, "loss": 0.5028, "step": 8432 }, { "epoch": 1.07, "grad_norm": 0.7020137781479109, "learning_rate": 4.640801890596875e-06, "loss": 0.4921, "step": 8433 }, { "epoch": 1.07, "grad_norm": 0.8762488343565392, "learning_rate": 4.639772977452192e-06, "loss": 0.5679, "step": 8434 }, { "epoch": 1.07, "grad_norm": 0.6958711154939297, "learning_rate": 4.638744079641222e-06, "loss": 0.492, "step": 8435 }, { "epoch": 1.07, "grad_norm": 0.689114992115966, "learning_rate": 4.6377151972077616e-06, "loss": 0.4725, "step": 8436 }, { "epoch": 1.07, "grad_norm": 0.7472976433703624, "learning_rate": 4.636686330195608e-06, "loss": 0.5361, "step": 8437 }, { "epoch": 1.07, "grad_norm": 0.7625279445897821, "learning_rate": 4.635657478648554e-06, "loss": 0.5082, "step": 8438 }, { "epoch": 1.08, "grad_norm": 0.7107890464207818, "learning_rate": 4.6346286426104e-06, "loss": 0.5421, "step": 8439 }, { "epoch": 1.08, "grad_norm": 0.6283271478914435, "learning_rate": 4.633599822124936e-06, "loss": 0.4799, "step": 8440 }, { "epoch": 1.08, "grad_norm": 0.7179127541771869, "learning_rate": 4.632571017235958e-06, "loss": 0.4939, "step": 8441 }, { "epoch": 1.08, "grad_norm": 0.5929685599772619, "learning_rate": 4.631542227987256e-06, "loss": 0.4426, "step": 8442 }, { "epoch": 1.08, "grad_norm": 0.7619415505757918, "learning_rate": 4.630513454422625e-06, "loss": 0.4697, "step": 8443 }, { "epoch": 1.08, "grad_norm": 0.6145496912518909, "learning_rate": 4.629484696585855e-06, "loss": 0.4827, "step": 8444 }, { "epoch": 1.08, "grad_norm": 0.7917648199349726, "learning_rate": 4.628455954520737e-06, "loss": 0.5028, "step": 8445 }, { "epoch": 1.08, "grad_norm": 0.6429403948703459, "learning_rate": 4.627427228271063e-06, "loss": 0.4956, "step": 8446 }, { "epoch": 1.08, "grad_norm": 0.7043783357359542, "learning_rate": 4.626398517880621e-06, "loss": 0.4971, "step": 8447 }, { "epoch": 1.08, "grad_norm": 0.5603148106640026, "learning_rate": 4.6253698233932e-06, "loss": 0.4695, "step": 8448 }, { "epoch": 1.08, "grad_norm": 0.7000625029904758, "learning_rate": 4.624341144852589e-06, "loss": 0.4331, "step": 8449 }, { "epoch": 1.08, "grad_norm": 0.7235475734831217, "learning_rate": 4.623312482302574e-06, "loss": 0.4813, "step": 8450 }, { "epoch": 1.08, "grad_norm": 0.7164336906123124, "learning_rate": 4.622283835786942e-06, "loss": 0.4594, "step": 8451 }, { "epoch": 1.08, "grad_norm": 0.8073894237025039, "learning_rate": 4.621255205349482e-06, "loss": 0.569, "step": 8452 }, { "epoch": 1.08, "grad_norm": 0.8459963819415339, "learning_rate": 4.620226591033977e-06, "loss": 0.5264, "step": 8453 }, { "epoch": 1.08, "grad_norm": 0.7778030068358962, "learning_rate": 4.619197992884213e-06, "loss": 0.5397, "step": 8454 }, { "epoch": 1.08, "grad_norm": 0.5668433521425842, "learning_rate": 4.618169410943973e-06, "loss": 0.4623, "step": 8455 }, { "epoch": 1.08, "grad_norm": 0.6018822436727288, "learning_rate": 4.61714084525704e-06, "loss": 0.4821, "step": 8456 }, { "epoch": 1.08, "grad_norm": 0.6914846077201479, "learning_rate": 4.616112295867199e-06, "loss": 0.4795, "step": 8457 }, { "epoch": 1.08, "grad_norm": 0.626801691390319, "learning_rate": 4.615083762818231e-06, "loss": 0.4504, "step": 8458 }, { "epoch": 1.08, "grad_norm": 0.5912242521165715, "learning_rate": 4.614055246153916e-06, "loss": 0.4401, "step": 8459 }, { "epoch": 1.08, "grad_norm": 0.6375960596102452, "learning_rate": 4.613026745918037e-06, "loss": 0.4583, "step": 8460 }, { "epoch": 1.08, "grad_norm": 0.7199161450730038, "learning_rate": 4.611998262154373e-06, "loss": 0.4799, "step": 8461 }, { "epoch": 1.08, "grad_norm": 0.7765032892839526, "learning_rate": 4.610969794906703e-06, "loss": 0.5635, "step": 8462 }, { "epoch": 1.08, "grad_norm": 0.7003572808161469, "learning_rate": 4.609941344218804e-06, "loss": 0.5241, "step": 8463 }, { "epoch": 1.08, "grad_norm": 0.7201438386129125, "learning_rate": 4.608912910134457e-06, "loss": 0.5248, "step": 8464 }, { "epoch": 1.08, "grad_norm": 0.631313299824022, "learning_rate": 4.607884492697437e-06, "loss": 0.4669, "step": 8465 }, { "epoch": 1.08, "grad_norm": 0.8139530153427013, "learning_rate": 4.606856091951523e-06, "loss": 0.489, "step": 8466 }, { "epoch": 1.08, "grad_norm": 0.5879956968665803, "learning_rate": 4.605827707940488e-06, "loss": 0.4634, "step": 8467 }, { "epoch": 1.08, "grad_norm": 0.6685052116960838, "learning_rate": 4.604799340708107e-06, "loss": 0.541, "step": 8468 }, { "epoch": 1.08, "grad_norm": 0.7104296776717307, "learning_rate": 4.6037709902981555e-06, "loss": 0.474, "step": 8469 }, { "epoch": 1.08, "grad_norm": 0.6014689511117821, "learning_rate": 4.602742656754407e-06, "loss": 0.4802, "step": 8470 }, { "epoch": 1.08, "grad_norm": 0.6157666300039946, "learning_rate": 4.601714340120634e-06, "loss": 0.4722, "step": 8471 }, { "epoch": 1.08, "grad_norm": 0.656486953096236, "learning_rate": 4.600686040440609e-06, "loss": 0.4995, "step": 8472 }, { "epoch": 1.08, "grad_norm": 0.7721062084650561, "learning_rate": 4.599657757758103e-06, "loss": 0.5929, "step": 8473 }, { "epoch": 1.08, "grad_norm": 0.6894633827978588, "learning_rate": 4.598629492116887e-06, "loss": 0.4997, "step": 8474 }, { "epoch": 1.08, "grad_norm": 0.8119950004487392, "learning_rate": 4.597601243560731e-06, "loss": 0.5158, "step": 8475 }, { "epoch": 1.08, "grad_norm": 0.7332548052297658, "learning_rate": 4.596573012133403e-06, "loss": 0.5482, "step": 8476 }, { "epoch": 1.08, "grad_norm": 0.6992240521993527, "learning_rate": 4.595544797878673e-06, "loss": 0.4908, "step": 8477 }, { "epoch": 1.08, "grad_norm": 0.7172619869783801, "learning_rate": 4.59451660084031e-06, "loss": 0.4776, "step": 8478 }, { "epoch": 1.08, "grad_norm": 0.5937967832691354, "learning_rate": 4.593488421062079e-06, "loss": 0.4863, "step": 8479 }, { "epoch": 1.08, "grad_norm": 0.7490510484117048, "learning_rate": 4.592460258587746e-06, "loss": 0.4768, "step": 8480 }, { "epoch": 1.08, "grad_norm": 0.6532429268903567, "learning_rate": 4.591432113461079e-06, "loss": 0.447, "step": 8481 }, { "epoch": 1.08, "grad_norm": 0.6112587121051494, "learning_rate": 4.59040398572584e-06, "loss": 0.4357, "step": 8482 }, { "epoch": 1.08, "grad_norm": 0.7077063572588193, "learning_rate": 4.589375875425795e-06, "loss": 0.4799, "step": 8483 }, { "epoch": 1.08, "grad_norm": 0.6327200742886288, "learning_rate": 4.5883477826047075e-06, "loss": 0.4513, "step": 8484 }, { "epoch": 1.08, "grad_norm": 0.6118384887184034, "learning_rate": 4.5873197073063385e-06, "loss": 0.428, "step": 8485 }, { "epoch": 1.08, "grad_norm": 0.6382769090767949, "learning_rate": 4.586291649574451e-06, "loss": 0.4515, "step": 8486 }, { "epoch": 1.08, "grad_norm": 0.63640148937763, "learning_rate": 4.585263609452807e-06, "loss": 0.5084, "step": 8487 }, { "epoch": 1.08, "grad_norm": 1.6005979336395098, "learning_rate": 4.584235586985162e-06, "loss": 0.5446, "step": 8488 }, { "epoch": 1.08, "grad_norm": 0.7239166760827016, "learning_rate": 4.583207582215283e-06, "loss": 0.5194, "step": 8489 }, { "epoch": 1.08, "grad_norm": 0.6976519505597664, "learning_rate": 4.582179595186925e-06, "loss": 0.5144, "step": 8490 }, { "epoch": 1.08, "grad_norm": 0.8564321157553273, "learning_rate": 4.581151625943846e-06, "loss": 0.5568, "step": 8491 }, { "epoch": 1.08, "grad_norm": 1.019726722347934, "learning_rate": 4.5801236745298035e-06, "loss": 0.5156, "step": 8492 }, { "epoch": 1.08, "grad_norm": 0.7631858368641522, "learning_rate": 4.579095740988555e-06, "loss": 0.5364, "step": 8493 }, { "epoch": 1.08, "grad_norm": 0.7451487220324169, "learning_rate": 4.5780678253638565e-06, "loss": 0.5113, "step": 8494 }, { "epoch": 1.08, "grad_norm": 0.5332420172698064, "learning_rate": 4.577039927699461e-06, "loss": 0.3696, "step": 8495 }, { "epoch": 1.08, "grad_norm": 0.7023011347967351, "learning_rate": 4.576012048039126e-06, "loss": 0.445, "step": 8496 }, { "epoch": 1.08, "grad_norm": 0.5533543310108555, "learning_rate": 4.574984186426602e-06, "loss": 0.4346, "step": 8497 }, { "epoch": 1.08, "grad_norm": 0.5938299622887316, "learning_rate": 4.573956342905643e-06, "loss": 0.4728, "step": 8498 }, { "epoch": 1.08, "grad_norm": 0.7108504577666083, "learning_rate": 4.572928517520001e-06, "loss": 0.5086, "step": 8499 }, { "epoch": 1.08, "grad_norm": 0.7453909158545868, "learning_rate": 4.5719007103134255e-06, "loss": 0.5116, "step": 8500 }, { "epoch": 1.08, "grad_norm": 1.1696602653173944, "learning_rate": 4.570872921329671e-06, "loss": 0.5146, "step": 8501 }, { "epoch": 1.08, "grad_norm": 2.651065938209936, "learning_rate": 4.569845150612485e-06, "loss": 0.5385, "step": 8502 }, { "epoch": 1.08, "grad_norm": 0.783421636126263, "learning_rate": 4.5688173982056155e-06, "loss": 0.4935, "step": 8503 }, { "epoch": 1.08, "grad_norm": 0.6527657778411697, "learning_rate": 4.567789664152812e-06, "loss": 0.5161, "step": 8504 }, { "epoch": 1.08, "grad_norm": 0.7547590276723879, "learning_rate": 4.566761948497821e-06, "loss": 0.4957, "step": 8505 }, { "epoch": 1.08, "grad_norm": 0.6484318284195962, "learning_rate": 4.5657342512843905e-06, "loss": 0.5079, "step": 8506 }, { "epoch": 1.08, "grad_norm": 0.6499076906790505, "learning_rate": 4.5647065725562646e-06, "loss": 0.4986, "step": 8507 }, { "epoch": 1.08, "grad_norm": 0.6062918986988345, "learning_rate": 4.563678912357189e-06, "loss": 0.4167, "step": 8508 }, { "epoch": 1.08, "grad_norm": 0.6235554844187612, "learning_rate": 4.562651270730907e-06, "loss": 0.4319, "step": 8509 }, { "epoch": 1.08, "grad_norm": 0.6540367638124763, "learning_rate": 4.561623647721164e-06, "loss": 0.4961, "step": 8510 }, { "epoch": 1.08, "grad_norm": 12.354005505526635, "learning_rate": 4.560596043371701e-06, "loss": 0.57, "step": 8511 }, { "epoch": 1.08, "grad_norm": 0.6988677283713708, "learning_rate": 4.55956845772626e-06, "loss": 0.504, "step": 8512 }, { "epoch": 1.08, "grad_norm": 0.7861868834231205, "learning_rate": 4.5585408908285816e-06, "loss": 0.5371, "step": 8513 }, { "epoch": 1.08, "grad_norm": 0.8069722489047302, "learning_rate": 4.557513342722408e-06, "loss": 0.5275, "step": 8514 }, { "epoch": 1.08, "grad_norm": 0.6365879697457588, "learning_rate": 4.5564858134514775e-06, "loss": 0.4497, "step": 8515 }, { "epoch": 1.08, "grad_norm": 0.6273633559367239, "learning_rate": 4.555458303059528e-06, "loss": 0.4146, "step": 8516 }, { "epoch": 1.09, "grad_norm": 0.8136841886984113, "learning_rate": 4.554430811590298e-06, "loss": 0.5138, "step": 8517 }, { "epoch": 1.09, "grad_norm": 0.8264935032692735, "learning_rate": 4.553403339087525e-06, "loss": 0.487, "step": 8518 }, { "epoch": 1.09, "grad_norm": 1.121747488940747, "learning_rate": 4.5523758855949436e-06, "loss": 0.4778, "step": 8519 }, { "epoch": 1.09, "grad_norm": 0.700967586858247, "learning_rate": 4.551348451156291e-06, "loss": 0.492, "step": 8520 }, { "epoch": 1.09, "grad_norm": 0.63724114384012, "learning_rate": 4.550321035815302e-06, "loss": 0.4543, "step": 8521 }, { "epoch": 1.09, "grad_norm": 0.6270102790803425, "learning_rate": 4.549293639615709e-06, "loss": 0.4213, "step": 8522 }, { "epoch": 1.09, "grad_norm": 0.582054691314822, "learning_rate": 4.548266262601245e-06, "loss": 0.4826, "step": 8523 }, { "epoch": 1.09, "grad_norm": 0.6218453651697858, "learning_rate": 4.5472389048156435e-06, "loss": 0.4312, "step": 8524 }, { "epoch": 1.09, "grad_norm": 0.6673240556469603, "learning_rate": 4.5462115663026334e-06, "loss": 0.466, "step": 8525 }, { "epoch": 1.09, "grad_norm": 0.7173811025798682, "learning_rate": 4.545184247105948e-06, "loss": 0.5349, "step": 8526 }, { "epoch": 1.09, "grad_norm": 0.7503162537633045, "learning_rate": 4.544156947269316e-06, "loss": 0.5353, "step": 8527 }, { "epoch": 1.09, "grad_norm": 0.6175185389318006, "learning_rate": 4.543129666836467e-06, "loss": 0.4338, "step": 8528 }, { "epoch": 1.09, "grad_norm": 0.5703059141701459, "learning_rate": 4.542102405851127e-06, "loss": 0.4122, "step": 8529 }, { "epoch": 1.09, "grad_norm": 0.6690488920865705, "learning_rate": 4.541075164357024e-06, "loss": 0.4688, "step": 8530 }, { "epoch": 1.09, "grad_norm": 1.0316807498732905, "learning_rate": 4.5400479423978855e-06, "loss": 0.4864, "step": 8531 }, { "epoch": 1.09, "grad_norm": 3.04091603041933, "learning_rate": 4.539020740017436e-06, "loss": 0.5029, "step": 8532 }, { "epoch": 1.09, "grad_norm": 0.6447024650700228, "learning_rate": 4.5379935572594e-06, "loss": 0.4826, "step": 8533 }, { "epoch": 1.09, "grad_norm": 0.7603019123434447, "learning_rate": 4.536966394167503e-06, "loss": 0.5101, "step": 8534 }, { "epoch": 1.09, "grad_norm": 0.5531883537578846, "learning_rate": 4.535939250785466e-06, "loss": 0.3976, "step": 8535 }, { "epoch": 1.09, "grad_norm": 0.5895226896128738, "learning_rate": 4.534912127157013e-06, "loss": 0.4469, "step": 8536 }, { "epoch": 1.09, "grad_norm": 0.8337964471556024, "learning_rate": 4.533885023325864e-06, "loss": 0.5188, "step": 8537 }, { "epoch": 1.09, "grad_norm": 0.796924810287293, "learning_rate": 4.532857939335738e-06, "loss": 0.5579, "step": 8538 }, { "epoch": 1.09, "grad_norm": 0.7125114184666226, "learning_rate": 4.531830875230359e-06, "loss": 0.5255, "step": 8539 }, { "epoch": 1.09, "grad_norm": 0.7751007418744865, "learning_rate": 4.5308038310534435e-06, "loss": 0.512, "step": 8540 }, { "epoch": 1.09, "grad_norm": 0.5971475669635828, "learning_rate": 4.529776806848709e-06, "loss": 0.4673, "step": 8541 }, { "epoch": 1.09, "grad_norm": 0.7450481095365744, "learning_rate": 4.528749802659875e-06, "loss": 0.5264, "step": 8542 }, { "epoch": 1.09, "grad_norm": 0.6659255352488508, "learning_rate": 4.5277228185306545e-06, "loss": 0.4933, "step": 8543 }, { "epoch": 1.09, "grad_norm": 0.629523510727235, "learning_rate": 4.5266958545047645e-06, "loss": 0.4729, "step": 8544 }, { "epoch": 1.09, "grad_norm": 0.8119753801888152, "learning_rate": 4.52566891062592e-06, "loss": 0.4691, "step": 8545 }, { "epoch": 1.09, "grad_norm": 0.6606991694064694, "learning_rate": 4.5246419869378335e-06, "loss": 0.4831, "step": 8546 }, { "epoch": 1.09, "grad_norm": 0.7158881713966089, "learning_rate": 4.52361508348422e-06, "loss": 0.5312, "step": 8547 }, { "epoch": 1.09, "grad_norm": 1.2598959055127832, "learning_rate": 4.522588200308789e-06, "loss": 0.5205, "step": 8548 }, { "epoch": 1.09, "grad_norm": 1.4816377323870071, "learning_rate": 4.521561337455252e-06, "loss": 0.5223, "step": 8549 }, { "epoch": 1.09, "grad_norm": 0.6636007298329161, "learning_rate": 4.520534494967319e-06, "loss": 0.4837, "step": 8550 }, { "epoch": 1.09, "grad_norm": 0.974156257685153, "learning_rate": 4.519507672888703e-06, "loss": 0.5202, "step": 8551 }, { "epoch": 1.09, "grad_norm": 0.6543097079647269, "learning_rate": 4.518480871263109e-06, "loss": 0.4973, "step": 8552 }, { "epoch": 1.09, "grad_norm": 0.7239561621348768, "learning_rate": 4.517454090134246e-06, "loss": 0.4602, "step": 8553 }, { "epoch": 1.09, "grad_norm": 0.6288596350119882, "learning_rate": 4.5164273295458196e-06, "loss": 0.4684, "step": 8554 }, { "epoch": 1.09, "grad_norm": 0.596747871792202, "learning_rate": 4.515400589541537e-06, "loss": 0.4458, "step": 8555 }, { "epoch": 1.09, "grad_norm": 0.6388115211221093, "learning_rate": 4.5143738701651035e-06, "loss": 0.453, "step": 8556 }, { "epoch": 1.09, "grad_norm": 0.8543492795767097, "learning_rate": 4.5133471714602215e-06, "loss": 0.5549, "step": 8557 }, { "epoch": 1.09, "grad_norm": 0.910159438413612, "learning_rate": 4.512320493470596e-06, "loss": 0.4789, "step": 8558 }, { "epoch": 1.09, "grad_norm": 0.5737940554333593, "learning_rate": 4.5112938362399284e-06, "loss": 0.4676, "step": 8559 }, { "epoch": 1.09, "grad_norm": 0.7892243399036161, "learning_rate": 4.51026719981192e-06, "loss": 0.5219, "step": 8560 }, { "epoch": 1.09, "grad_norm": 0.6176669981869405, "learning_rate": 4.5092405842302725e-06, "loss": 0.4941, "step": 8561 }, { "epoch": 1.09, "grad_norm": 0.6739875231464935, "learning_rate": 4.508213989538683e-06, "loss": 0.4539, "step": 8562 }, { "epoch": 1.09, "grad_norm": 0.7454513425658681, "learning_rate": 4.507187415780855e-06, "loss": 0.5714, "step": 8563 }, { "epoch": 1.09, "grad_norm": 1.5481377961594522, "learning_rate": 4.506160863000483e-06, "loss": 0.5671, "step": 8564 }, { "epoch": 1.09, "grad_norm": 0.687251456902005, "learning_rate": 4.505134331241265e-06, "loss": 0.4877, "step": 8565 }, { "epoch": 1.09, "grad_norm": 0.7327951515727712, "learning_rate": 4.504107820546898e-06, "loss": 0.4892, "step": 8566 }, { "epoch": 1.09, "grad_norm": 0.7368247642955208, "learning_rate": 4.503081330961076e-06, "loss": 0.5365, "step": 8567 }, { "epoch": 1.09, "grad_norm": 0.8655420527301664, "learning_rate": 4.502054862527494e-06, "loss": 0.5401, "step": 8568 }, { "epoch": 1.09, "grad_norm": 0.668336103426427, "learning_rate": 4.5010284152898445e-06, "loss": 0.487, "step": 8569 }, { "epoch": 1.09, "grad_norm": 0.6543585134052369, "learning_rate": 4.500001989291821e-06, "loss": 0.4561, "step": 8570 }, { "epoch": 1.09, "grad_norm": 0.616512254387168, "learning_rate": 4.498975584577116e-06, "loss": 0.4662, "step": 8571 }, { "epoch": 1.09, "grad_norm": 0.5417851188518977, "learning_rate": 4.497949201189418e-06, "loss": 0.4544, "step": 8572 }, { "epoch": 1.09, "grad_norm": 0.6382641415253936, "learning_rate": 4.49692283917242e-06, "loss": 0.4463, "step": 8573 }, { "epoch": 1.09, "grad_norm": 0.6068944434643876, "learning_rate": 4.495896498569807e-06, "loss": 0.4646, "step": 8574 }, { "epoch": 1.09, "grad_norm": 0.7079080744809629, "learning_rate": 4.49487017942527e-06, "loss": 0.4764, "step": 8575 }, { "epoch": 1.09, "grad_norm": 0.5734771413038712, "learning_rate": 4.493843881782495e-06, "loss": 0.4756, "step": 8576 }, { "epoch": 1.09, "grad_norm": 0.6236499671704739, "learning_rate": 4.492817605685169e-06, "loss": 0.4982, "step": 8577 }, { "epoch": 1.09, "grad_norm": 0.9465062605165874, "learning_rate": 4.491791351176977e-06, "loss": 0.5133, "step": 8578 }, { "epoch": 1.09, "grad_norm": 0.6049141820933159, "learning_rate": 4.490765118301603e-06, "loss": 0.4948, "step": 8579 }, { "epoch": 1.09, "grad_norm": 0.7607790457656651, "learning_rate": 4.48973890710273e-06, "loss": 0.4705, "step": 8580 }, { "epoch": 1.09, "grad_norm": 0.629780419140997, "learning_rate": 4.4887127176240415e-06, "loss": 0.493, "step": 8581 }, { "epoch": 1.09, "grad_norm": 0.6243714315478958, "learning_rate": 4.487686549909219e-06, "loss": 0.4483, "step": 8582 }, { "epoch": 1.09, "grad_norm": 0.6057055072199797, "learning_rate": 4.486660404001944e-06, "loss": 0.4305, "step": 8583 }, { "epoch": 1.09, "grad_norm": 0.6615294020976404, "learning_rate": 4.485634279945894e-06, "loss": 0.4765, "step": 8584 }, { "epoch": 1.09, "grad_norm": 0.6795424287282442, "learning_rate": 4.484608177784749e-06, "loss": 0.4713, "step": 8585 }, { "epoch": 1.09, "grad_norm": 0.5962094773294149, "learning_rate": 4.483582097562187e-06, "loss": 0.4787, "step": 8586 }, { "epoch": 1.09, "grad_norm": 0.7227475521486151, "learning_rate": 4.482556039321884e-06, "loss": 0.4962, "step": 8587 }, { "epoch": 1.09, "grad_norm": 0.6434688661805869, "learning_rate": 4.481530003107518e-06, "loss": 0.4598, "step": 8588 }, { "epoch": 1.09, "grad_norm": 0.5934801999275032, "learning_rate": 4.480503988962762e-06, "loss": 0.4845, "step": 8589 }, { "epoch": 1.09, "grad_norm": 0.753699827132346, "learning_rate": 4.479477996931293e-06, "loss": 0.57, "step": 8590 }, { "epoch": 1.09, "grad_norm": 0.7304870421957352, "learning_rate": 4.478452027056781e-06, "loss": 0.5929, "step": 8591 }, { "epoch": 1.09, "grad_norm": 0.8228141212036288, "learning_rate": 4.4774260793829e-06, "loss": 0.5648, "step": 8592 }, { "epoch": 1.09, "grad_norm": 0.5714437996989518, "learning_rate": 4.476400153953321e-06, "loss": 0.4417, "step": 8593 }, { "epoch": 1.09, "grad_norm": 0.6184048249245075, "learning_rate": 4.475374250811714e-06, "loss": 0.4568, "step": 8594 }, { "epoch": 1.09, "grad_norm": 0.772098798074029, "learning_rate": 4.474348370001749e-06, "loss": 0.5371, "step": 8595 }, { "epoch": 1.1, "grad_norm": 0.9233128394480612, "learning_rate": 4.473322511567094e-06, "loss": 0.518, "step": 8596 }, { "epoch": 1.1, "grad_norm": 0.7172785458518779, "learning_rate": 4.472296675551416e-06, "loss": 0.4532, "step": 8597 }, { "epoch": 1.1, "grad_norm": 0.7916605861801453, "learning_rate": 4.471270861998383e-06, "loss": 0.542, "step": 8598 }, { "epoch": 1.1, "grad_norm": 0.5829875274529767, "learning_rate": 4.4702450709516595e-06, "loss": 0.4628, "step": 8599 }, { "epoch": 1.1, "grad_norm": 0.7499182173210165, "learning_rate": 4.4692193024549095e-06, "loss": 0.4486, "step": 8600 }, { "epoch": 1.1, "grad_norm": 0.7169296975442507, "learning_rate": 4.468193556551798e-06, "loss": 0.4398, "step": 8601 }, { "epoch": 1.1, "grad_norm": 0.6425119022213079, "learning_rate": 4.467167833285988e-06, "loss": 0.4515, "step": 8602 }, { "epoch": 1.1, "grad_norm": 0.5763835112707627, "learning_rate": 4.466142132701141e-06, "loss": 0.4667, "step": 8603 }, { "epoch": 1.1, "grad_norm": 0.6296111599683203, "learning_rate": 4.465116454840918e-06, "loss": 0.5095, "step": 8604 }, { "epoch": 1.1, "grad_norm": 0.747651229603383, "learning_rate": 4.4640907997489765e-06, "loss": 0.5434, "step": 8605 }, { "epoch": 1.1, "grad_norm": 0.7213999981081614, "learning_rate": 4.4630651674689785e-06, "loss": 0.4558, "step": 8606 }, { "epoch": 1.1, "grad_norm": 0.7039531633040491, "learning_rate": 4.4620395580445806e-06, "loss": 0.48, "step": 8607 }, { "epoch": 1.1, "grad_norm": 0.5914858093262314, "learning_rate": 4.461013971519439e-06, "loss": 0.4814, "step": 8608 }, { "epoch": 1.1, "grad_norm": 0.7893659125193687, "learning_rate": 4.45998840793721e-06, "loss": 0.584, "step": 8609 }, { "epoch": 1.1, "grad_norm": 0.8114567157705281, "learning_rate": 4.45896286734155e-06, "loss": 0.4689, "step": 8610 }, { "epoch": 1.1, "grad_norm": 0.6929330862075997, "learning_rate": 4.457937349776111e-06, "loss": 0.4272, "step": 8611 }, { "epoch": 1.1, "grad_norm": 0.7801562241755182, "learning_rate": 4.456911855284545e-06, "loss": 0.4953, "step": 8612 }, { "epoch": 1.1, "grad_norm": 0.6751757870398382, "learning_rate": 4.455886383910507e-06, "loss": 0.5268, "step": 8613 }, { "epoch": 1.1, "grad_norm": 0.7973034526726619, "learning_rate": 4.454860935697648e-06, "loss": 0.5348, "step": 8614 }, { "epoch": 1.1, "grad_norm": 0.6372102847295429, "learning_rate": 4.453835510689617e-06, "loss": 0.4681, "step": 8615 }, { "epoch": 1.1, "grad_norm": 0.6916366072561504, "learning_rate": 4.452810108930063e-06, "loss": 0.5278, "step": 8616 }, { "epoch": 1.1, "grad_norm": 0.8292440576209625, "learning_rate": 4.451784730462634e-06, "loss": 0.5617, "step": 8617 }, { "epoch": 1.1, "grad_norm": 0.6891838474682641, "learning_rate": 4.450759375330977e-06, "loss": 0.5236, "step": 8618 }, { "epoch": 1.1, "grad_norm": 0.6294819532266165, "learning_rate": 4.4497340435787385e-06, "loss": 0.4739, "step": 8619 }, { "epoch": 1.1, "grad_norm": 0.6667224319215789, "learning_rate": 4.4487087352495625e-06, "loss": 0.475, "step": 8620 }, { "epoch": 1.1, "grad_norm": 0.6618942768878272, "learning_rate": 4.447683450387095e-06, "loss": 0.4744, "step": 8621 }, { "epoch": 1.1, "grad_norm": 0.7378393210761782, "learning_rate": 4.446658189034977e-06, "loss": 0.5222, "step": 8622 }, { "epoch": 1.1, "grad_norm": 0.6726329303074522, "learning_rate": 4.4456329512368525e-06, "loss": 0.4672, "step": 8623 }, { "epoch": 1.1, "grad_norm": 0.7723465303164209, "learning_rate": 4.44460773703636e-06, "loss": 0.5219, "step": 8624 }, { "epoch": 1.1, "grad_norm": 0.6935111887510977, "learning_rate": 4.443582546477143e-06, "loss": 0.4762, "step": 8625 }, { "epoch": 1.1, "grad_norm": 0.565089202783496, "learning_rate": 4.4425573796028385e-06, "loss": 0.4188, "step": 8626 }, { "epoch": 1.1, "grad_norm": 0.5791298537921312, "learning_rate": 4.4415322364570854e-06, "loss": 0.4689, "step": 8627 }, { "epoch": 1.1, "grad_norm": 0.6218219346859978, "learning_rate": 4.44050711708352e-06, "loss": 0.4506, "step": 8628 }, { "epoch": 1.1, "grad_norm": 0.6059194126133071, "learning_rate": 4.43948202152578e-06, "loss": 0.4636, "step": 8629 }, { "epoch": 1.1, "grad_norm": 0.5934545801783437, "learning_rate": 4.438456949827498e-06, "loss": 0.4531, "step": 8630 }, { "epoch": 1.1, "grad_norm": 0.6532553877571453, "learning_rate": 4.43743190203231e-06, "loss": 0.4845, "step": 8631 }, { "epoch": 1.1, "grad_norm": 0.7466083408297207, "learning_rate": 4.436406878183848e-06, "loss": 0.5414, "step": 8632 }, { "epoch": 1.1, "grad_norm": 0.7449060202951123, "learning_rate": 4.435381878325744e-06, "loss": 0.526, "step": 8633 }, { "epoch": 1.1, "grad_norm": 0.6527145089285332, "learning_rate": 4.434356902501629e-06, "loss": 0.5192, "step": 8634 }, { "epoch": 1.1, "grad_norm": 0.5614537143279134, "learning_rate": 4.433331950755133e-06, "loss": 0.4947, "step": 8635 }, { "epoch": 1.1, "grad_norm": 0.6354571424910672, "learning_rate": 4.432307023129885e-06, "loss": 0.4495, "step": 8636 }, { "epoch": 1.1, "grad_norm": 0.6750311823057951, "learning_rate": 4.431282119669513e-06, "loss": 0.4375, "step": 8637 }, { "epoch": 1.1, "grad_norm": 0.6404922351551362, "learning_rate": 4.430257240417644e-06, "loss": 0.4317, "step": 8638 }, { "epoch": 1.1, "grad_norm": 0.5444893108505425, "learning_rate": 4.429232385417903e-06, "loss": 0.3925, "step": 8639 }, { "epoch": 1.1, "grad_norm": 0.61761876384027, "learning_rate": 4.428207554713916e-06, "loss": 0.4815, "step": 8640 }, { "epoch": 1.1, "grad_norm": 0.7125928483156411, "learning_rate": 4.4271827483493065e-06, "loss": 0.5078, "step": 8641 }, { "epoch": 1.1, "grad_norm": 0.7224533202582047, "learning_rate": 4.4261579663676964e-06, "loss": 0.5342, "step": 8642 }, { "epoch": 1.1, "grad_norm": 0.8157519984574083, "learning_rate": 4.425133208812708e-06, "loss": 0.5587, "step": 8643 }, { "epoch": 1.1, "grad_norm": 0.9054439381780619, "learning_rate": 4.424108475727962e-06, "loss": 0.5551, "step": 8644 }, { "epoch": 1.1, "grad_norm": 0.6051133046357489, "learning_rate": 4.423083767157078e-06, "loss": 0.4632, "step": 8645 }, { "epoch": 1.1, "grad_norm": 0.6810378920543833, "learning_rate": 4.422059083143675e-06, "loss": 0.5001, "step": 8646 }, { "epoch": 1.1, "grad_norm": 0.8214147314192999, "learning_rate": 4.4210344237313695e-06, "loss": 0.5469, "step": 8647 }, { "epoch": 1.1, "grad_norm": 0.7436719018597728, "learning_rate": 4.420009788963779e-06, "loss": 0.5448, "step": 8648 }, { "epoch": 1.1, "grad_norm": 0.6598472872452492, "learning_rate": 4.4189851788845165e-06, "loss": 0.5042, "step": 8649 }, { "epoch": 1.1, "grad_norm": 0.9718468638325388, "learning_rate": 4.4179605935372e-06, "loss": 0.47, "step": 8650 }, { "epoch": 1.1, "grad_norm": 0.5969621057466895, "learning_rate": 4.41693603296544e-06, "loss": 0.453, "step": 8651 }, { "epoch": 1.1, "grad_norm": 0.5265999909546717, "learning_rate": 4.415911497212852e-06, "loss": 0.394, "step": 8652 }, { "epoch": 1.1, "grad_norm": 0.6303210903993633, "learning_rate": 4.414886986323043e-06, "loss": 0.4368, "step": 8653 }, { "epoch": 1.1, "grad_norm": 0.6939423926267622, "learning_rate": 4.4138625003396265e-06, "loss": 0.4152, "step": 8654 }, { "epoch": 1.1, "grad_norm": 0.626467544574878, "learning_rate": 4.41283803930621e-06, "loss": 0.5155, "step": 8655 }, { "epoch": 1.1, "grad_norm": 0.7689550372434832, "learning_rate": 4.411813603266401e-06, "loss": 0.4649, "step": 8656 }, { "epoch": 1.1, "grad_norm": 0.6866838720873619, "learning_rate": 4.410789192263808e-06, "loss": 0.526, "step": 8657 }, { "epoch": 1.1, "grad_norm": 0.8654830526462901, "learning_rate": 4.409764806342037e-06, "loss": 0.5443, "step": 8658 }, { "epoch": 1.1, "grad_norm": 0.5976793751090987, "learning_rate": 4.408740445544691e-06, "loss": 0.4873, "step": 8659 }, { "epoch": 1.1, "grad_norm": 0.5784852569203252, "learning_rate": 4.407716109915375e-06, "loss": 0.4481, "step": 8660 }, { "epoch": 1.1, "grad_norm": 0.6564506025264064, "learning_rate": 4.406691799497692e-06, "loss": 0.4941, "step": 8661 }, { "epoch": 1.1, "grad_norm": 0.7154700971699651, "learning_rate": 4.405667514335241e-06, "loss": 0.5411, "step": 8662 }, { "epoch": 1.1, "grad_norm": 0.6501901165115125, "learning_rate": 4.404643254471626e-06, "loss": 0.4738, "step": 8663 }, { "epoch": 1.1, "grad_norm": 0.6076375323665985, "learning_rate": 4.403619019950446e-06, "loss": 0.4509, "step": 8664 }, { "epoch": 1.1, "grad_norm": 0.6591207461291212, "learning_rate": 4.4025948108152985e-06, "loss": 0.4859, "step": 8665 }, { "epoch": 1.1, "grad_norm": 0.6272720824264322, "learning_rate": 4.401570627109781e-06, "loss": 0.5301, "step": 8666 }, { "epoch": 1.1, "grad_norm": 0.6913948497572918, "learning_rate": 4.400546468877489e-06, "loss": 0.4798, "step": 8667 }, { "epoch": 1.1, "grad_norm": 0.6434516985941561, "learning_rate": 4.3995223361620185e-06, "loss": 0.4689, "step": 8668 }, { "epoch": 1.1, "grad_norm": 0.5876805938368626, "learning_rate": 4.398498229006963e-06, "loss": 0.4696, "step": 8669 }, { "epoch": 1.1, "grad_norm": 0.6057948307165987, "learning_rate": 4.3974741474559165e-06, "loss": 0.4372, "step": 8670 }, { "epoch": 1.1, "grad_norm": 0.6715480883261464, "learning_rate": 4.39645009155247e-06, "loss": 0.456, "step": 8671 }, { "epoch": 1.1, "grad_norm": 0.7285974130030384, "learning_rate": 4.395426061340214e-06, "loss": 0.5578, "step": 8672 }, { "epoch": 1.1, "grad_norm": 0.6896630406295814, "learning_rate": 4.394402056862739e-06, "loss": 0.4578, "step": 8673 }, { "epoch": 1.11, "grad_norm": 0.6618818964229133, "learning_rate": 4.39337807816363e-06, "loss": 0.4832, "step": 8674 }, { "epoch": 1.11, "grad_norm": 0.7675535119380845, "learning_rate": 4.392354125286482e-06, "loss": 0.5144, "step": 8675 }, { "epoch": 1.11, "grad_norm": 0.8639253036233522, "learning_rate": 4.391330198274875e-06, "loss": 0.4811, "step": 8676 }, { "epoch": 1.11, "grad_norm": 0.6290323980018491, "learning_rate": 4.390306297172398e-06, "loss": 0.4495, "step": 8677 }, { "epoch": 1.11, "grad_norm": 0.5589253284305435, "learning_rate": 4.389282422022632e-06, "loss": 0.4362, "step": 8678 }, { "epoch": 1.11, "grad_norm": 0.6243316474311928, "learning_rate": 4.388258572869163e-06, "loss": 0.4681, "step": 8679 }, { "epoch": 1.11, "grad_norm": 0.7325730737097648, "learning_rate": 4.387234749755572e-06, "loss": 0.486, "step": 8680 }, { "epoch": 1.11, "grad_norm": 0.6590763989928909, "learning_rate": 4.386210952725438e-06, "loss": 0.4609, "step": 8681 }, { "epoch": 1.11, "grad_norm": 0.6050061783622741, "learning_rate": 4.3851871818223436e-06, "loss": 0.4959, "step": 8682 }, { "epoch": 1.11, "grad_norm": 0.8861113074397442, "learning_rate": 4.3841634370898665e-06, "loss": 0.5099, "step": 8683 }, { "epoch": 1.11, "grad_norm": 0.6315380546199132, "learning_rate": 4.383139718571583e-06, "loss": 0.4538, "step": 8684 }, { "epoch": 1.11, "grad_norm": 0.7651877298522781, "learning_rate": 4.3821160263110706e-06, "loss": 0.5382, "step": 8685 }, { "epoch": 1.11, "grad_norm": 0.8077359680707338, "learning_rate": 4.381092360351903e-06, "loss": 0.5179, "step": 8686 }, { "epoch": 1.11, "grad_norm": 0.6692613847095965, "learning_rate": 4.380068720737657e-06, "loss": 0.4142, "step": 8687 }, { "epoch": 1.11, "grad_norm": 0.6390244607802591, "learning_rate": 4.379045107511905e-06, "loss": 0.4423, "step": 8688 }, { "epoch": 1.11, "grad_norm": 0.5724714658015041, "learning_rate": 4.378021520718219e-06, "loss": 0.4826, "step": 8689 }, { "epoch": 1.11, "grad_norm": 0.6262749841077924, "learning_rate": 4.376997960400169e-06, "loss": 0.472, "step": 8690 }, { "epoch": 1.11, "grad_norm": 0.6323066172468694, "learning_rate": 4.375974426601325e-06, "loss": 0.4979, "step": 8691 }, { "epoch": 1.11, "grad_norm": 0.7704174814894261, "learning_rate": 4.374950919365256e-06, "loss": 0.5666, "step": 8692 }, { "epoch": 1.11, "grad_norm": 0.6165845872604485, "learning_rate": 4.373927438735528e-06, "loss": 0.4665, "step": 8693 }, { "epoch": 1.11, "grad_norm": 0.7284894634413241, "learning_rate": 4.3729039847557085e-06, "loss": 0.4949, "step": 8694 }, { "epoch": 1.11, "grad_norm": 0.7551244400144178, "learning_rate": 4.371880557469363e-06, "loss": 0.5939, "step": 8695 }, { "epoch": 1.11, "grad_norm": 0.7800292378269323, "learning_rate": 4.370857156920055e-06, "loss": 0.5347, "step": 8696 }, { "epoch": 1.11, "grad_norm": 0.7780927129731123, "learning_rate": 4.369833783151347e-06, "loss": 0.5968, "step": 8697 }, { "epoch": 1.11, "grad_norm": 0.766192995022262, "learning_rate": 4.368810436206801e-06, "loss": 0.5426, "step": 8698 }, { "epoch": 1.11, "grad_norm": 0.818234690258935, "learning_rate": 4.3677871161299766e-06, "loss": 0.5855, "step": 8699 }, { "epoch": 1.11, "grad_norm": 0.7936641865945461, "learning_rate": 4.366763822964436e-06, "loss": 0.5746, "step": 8700 }, { "epoch": 1.11, "grad_norm": 0.7858493847198313, "learning_rate": 4.365740556753736e-06, "loss": 0.531, "step": 8701 }, { "epoch": 1.11, "grad_norm": 0.6234169750185313, "learning_rate": 4.364717317541433e-06, "loss": 0.4522, "step": 8702 }, { "epoch": 1.11, "grad_norm": 0.5651259641054174, "learning_rate": 4.363694105371085e-06, "loss": 0.4544, "step": 8703 }, { "epoch": 1.11, "grad_norm": 0.714957264872062, "learning_rate": 4.362670920286243e-06, "loss": 0.4892, "step": 8704 }, { "epoch": 1.11, "grad_norm": 0.6221203498088445, "learning_rate": 4.361647762330465e-06, "loss": 0.4668, "step": 8705 }, { "epoch": 1.11, "grad_norm": 0.8096269206852756, "learning_rate": 4.360624631547301e-06, "loss": 0.4826, "step": 8706 }, { "epoch": 1.11, "grad_norm": 0.7454281668502041, "learning_rate": 4.359601527980305e-06, "loss": 0.4689, "step": 8707 }, { "epoch": 1.11, "grad_norm": 0.623672530163333, "learning_rate": 4.358578451673024e-06, "loss": 0.4782, "step": 8708 }, { "epoch": 1.11, "grad_norm": 0.6512129741626779, "learning_rate": 4.35755540266901e-06, "loss": 0.4442, "step": 8709 }, { "epoch": 1.11, "grad_norm": 0.6338054233723615, "learning_rate": 4.356532381011809e-06, "loss": 0.444, "step": 8710 }, { "epoch": 1.11, "grad_norm": 0.6410156350100498, "learning_rate": 4.355509386744967e-06, "loss": 0.4419, "step": 8711 }, { "epoch": 1.11, "grad_norm": 0.8272582864463619, "learning_rate": 4.354486419912032e-06, "loss": 0.555, "step": 8712 }, { "epoch": 1.11, "grad_norm": 1.0136044771380013, "learning_rate": 4.353463480556548e-06, "loss": 0.5477, "step": 8713 }, { "epoch": 1.11, "grad_norm": 0.6316251371016259, "learning_rate": 4.3524405687220564e-06, "loss": 0.457, "step": 8714 }, { "epoch": 1.11, "grad_norm": 0.5959454485535449, "learning_rate": 4.351417684452101e-06, "loss": 0.4442, "step": 8715 }, { "epoch": 1.11, "grad_norm": 0.7357473253675132, "learning_rate": 4.350394827790222e-06, "loss": 0.4624, "step": 8716 }, { "epoch": 1.11, "grad_norm": 0.708131638468915, "learning_rate": 4.3493719987799595e-06, "loss": 0.494, "step": 8717 }, { "epoch": 1.11, "grad_norm": 0.7470162034134946, "learning_rate": 4.3483491974648525e-06, "loss": 0.5011, "step": 8718 }, { "epoch": 1.11, "grad_norm": 0.7863145203388021, "learning_rate": 4.347326423888438e-06, "loss": 0.5582, "step": 8719 }, { "epoch": 1.11, "grad_norm": 0.7020088784737551, "learning_rate": 4.34630367809425e-06, "loss": 0.4847, "step": 8720 }, { "epoch": 1.11, "grad_norm": 0.6526452735990081, "learning_rate": 4.345280960125828e-06, "loss": 0.5078, "step": 8721 }, { "epoch": 1.11, "grad_norm": 0.601958441186249, "learning_rate": 4.344258270026702e-06, "loss": 0.5041, "step": 8722 }, { "epoch": 1.11, "grad_norm": 0.6923037317230399, "learning_rate": 4.343235607840406e-06, "loss": 0.5121, "step": 8723 }, { "epoch": 1.11, "grad_norm": 0.5976230137798745, "learning_rate": 4.342212973610469e-06, "loss": 0.4259, "step": 8724 }, { "epoch": 1.11, "grad_norm": 0.7412289909548726, "learning_rate": 4.3411903673804266e-06, "loss": 0.4473, "step": 8725 }, { "epoch": 1.11, "grad_norm": 0.684473919796735, "learning_rate": 4.340167789193805e-06, "loss": 0.5068, "step": 8726 }, { "epoch": 1.11, "grad_norm": 0.6539044304974383, "learning_rate": 4.339145239094132e-06, "loss": 0.4859, "step": 8727 }, { "epoch": 1.11, "grad_norm": 0.8439199576465645, "learning_rate": 4.338122717124934e-06, "loss": 0.5716, "step": 8728 }, { "epoch": 1.11, "grad_norm": 0.6855460364648731, "learning_rate": 4.337100223329737e-06, "loss": 0.5111, "step": 8729 }, { "epoch": 1.11, "grad_norm": 0.6677128874231376, "learning_rate": 4.336077757752066e-06, "loss": 0.5386, "step": 8730 }, { "epoch": 1.11, "grad_norm": 0.8486124846884892, "learning_rate": 4.335055320435442e-06, "loss": 0.5168, "step": 8731 }, { "epoch": 1.11, "grad_norm": 0.8376220912232718, "learning_rate": 4.3340329114233886e-06, "loss": 0.4783, "step": 8732 }, { "epoch": 1.11, "grad_norm": 0.6469251931162432, "learning_rate": 4.333010530759426e-06, "loss": 0.4524, "step": 8733 }, { "epoch": 1.11, "grad_norm": 0.5702683842430247, "learning_rate": 4.331988178487074e-06, "loss": 0.4402, "step": 8734 }, { "epoch": 1.11, "grad_norm": 0.6489907106342068, "learning_rate": 4.330965854649851e-06, "loss": 0.4388, "step": 8735 }, { "epoch": 1.11, "grad_norm": 0.6926037390772614, "learning_rate": 4.32994355929127e-06, "loss": 0.4754, "step": 8736 }, { "epoch": 1.11, "grad_norm": 0.8381220742630461, "learning_rate": 4.328921292454854e-06, "loss": 0.5717, "step": 8737 }, { "epoch": 1.11, "grad_norm": 0.6811558963561367, "learning_rate": 4.3278990541841135e-06, "loss": 0.5004, "step": 8738 }, { "epoch": 1.11, "grad_norm": 0.8772869906540514, "learning_rate": 4.326876844522563e-06, "loss": 0.5307, "step": 8739 }, { "epoch": 1.11, "grad_norm": 0.6755541977106135, "learning_rate": 4.3258546635137135e-06, "loss": 0.5776, "step": 8740 }, { "epoch": 1.11, "grad_norm": 0.7146363999171752, "learning_rate": 4.3248325112010775e-06, "loss": 0.5548, "step": 8741 }, { "epoch": 1.11, "grad_norm": 0.7573293702064998, "learning_rate": 4.3238103876281635e-06, "loss": 0.5409, "step": 8742 }, { "epoch": 1.11, "grad_norm": 0.7290973537859432, "learning_rate": 4.32278829283848e-06, "loss": 0.5383, "step": 8743 }, { "epoch": 1.11, "grad_norm": 1.4555408990939345, "learning_rate": 4.321766226875536e-06, "loss": 0.4785, "step": 8744 }, { "epoch": 1.11, "grad_norm": 0.788367839865495, "learning_rate": 4.320744189782836e-06, "loss": 0.5144, "step": 8745 }, { "epoch": 1.11, "grad_norm": 0.58756658494428, "learning_rate": 4.319722181603885e-06, "loss": 0.4537, "step": 8746 }, { "epoch": 1.11, "grad_norm": 0.7559035060987543, "learning_rate": 4.3187002023821875e-06, "loss": 0.4853, "step": 8747 }, { "epoch": 1.11, "grad_norm": 0.8002218078278686, "learning_rate": 4.317678252161242e-06, "loss": 0.5377, "step": 8748 }, { "epoch": 1.11, "grad_norm": 0.6308985445671924, "learning_rate": 4.316656330984556e-06, "loss": 0.5428, "step": 8749 }, { "epoch": 1.11, "grad_norm": 0.7165376891723766, "learning_rate": 4.315634438895626e-06, "loss": 0.4991, "step": 8750 }, { "epoch": 1.11, "grad_norm": 0.762359985958814, "learning_rate": 4.314612575937951e-06, "loss": 0.5207, "step": 8751 }, { "epoch": 1.11, "grad_norm": 0.727014580873897, "learning_rate": 4.313590742155028e-06, "loss": 0.4511, "step": 8752 }, { "epoch": 1.12, "grad_norm": 0.8891728874339988, "learning_rate": 4.312568937590354e-06, "loss": 0.5493, "step": 8753 }, { "epoch": 1.12, "grad_norm": 0.7236852647320612, "learning_rate": 4.311547162287424e-06, "loss": 0.4813, "step": 8754 }, { "epoch": 1.12, "grad_norm": 0.7870785375394218, "learning_rate": 4.3105254162897305e-06, "loss": 0.5121, "step": 8755 }, { "epoch": 1.12, "grad_norm": 0.6218075671455104, "learning_rate": 4.309503699640768e-06, "loss": 0.473, "step": 8756 }, { "epoch": 1.12, "grad_norm": 0.7602237625412849, "learning_rate": 4.308482012384025e-06, "loss": 0.5028, "step": 8757 }, { "epoch": 1.12, "grad_norm": 0.754321941869743, "learning_rate": 4.307460354562995e-06, "loss": 0.4829, "step": 8758 }, { "epoch": 1.12, "grad_norm": 0.5843259666928085, "learning_rate": 4.306438726221163e-06, "loss": 0.4733, "step": 8759 }, { "epoch": 1.12, "grad_norm": 0.6226233407243121, "learning_rate": 4.305417127402018e-06, "loss": 0.4648, "step": 8760 }, { "epoch": 1.12, "grad_norm": 0.5072978089465365, "learning_rate": 4.304395558149046e-06, "loss": 0.3804, "step": 8761 }, { "epoch": 1.12, "grad_norm": 0.5761660222577973, "learning_rate": 4.3033740185057335e-06, "loss": 0.451, "step": 8762 }, { "epoch": 1.12, "grad_norm": 0.5597541850739801, "learning_rate": 4.3023525085155625e-06, "loss": 0.458, "step": 8763 }, { "epoch": 1.12, "grad_norm": 0.666625679379583, "learning_rate": 4.301331028222016e-06, "loss": 0.4437, "step": 8764 }, { "epoch": 1.12, "grad_norm": 0.6442802108456889, "learning_rate": 4.300309577668575e-06, "loss": 0.4981, "step": 8765 }, { "epoch": 1.12, "grad_norm": 0.5518759462909568, "learning_rate": 4.2992881568987175e-06, "loss": 0.4141, "step": 8766 }, { "epoch": 1.12, "grad_norm": 0.7275051122919908, "learning_rate": 4.298266765955926e-06, "loss": 0.4797, "step": 8767 }, { "epoch": 1.12, "grad_norm": 0.826745839680901, "learning_rate": 4.297245404883676e-06, "loss": 0.6154, "step": 8768 }, { "epoch": 1.12, "grad_norm": 0.7263518334293324, "learning_rate": 4.296224073725443e-06, "loss": 0.52, "step": 8769 }, { "epoch": 1.12, "grad_norm": 0.5940595349038942, "learning_rate": 4.2952027725247025e-06, "loss": 0.4792, "step": 8770 }, { "epoch": 1.12, "grad_norm": 0.7666531327699513, "learning_rate": 4.294181501324928e-06, "loss": 0.5156, "step": 8771 }, { "epoch": 1.12, "grad_norm": 0.5711495507530732, "learning_rate": 4.293160260169591e-06, "loss": 0.477, "step": 8772 }, { "epoch": 1.12, "grad_norm": 0.6589047347320118, "learning_rate": 4.292139049102162e-06, "loss": 0.4651, "step": 8773 }, { "epoch": 1.12, "grad_norm": 0.5847462779913895, "learning_rate": 4.291117868166113e-06, "loss": 0.5123, "step": 8774 }, { "epoch": 1.12, "grad_norm": 0.7016365497518163, "learning_rate": 4.29009671740491e-06, "loss": 0.5135, "step": 8775 }, { "epoch": 1.12, "grad_norm": 0.74615706784095, "learning_rate": 4.289075596862022e-06, "loss": 0.4529, "step": 8776 }, { "epoch": 1.12, "grad_norm": 0.8234423431622463, "learning_rate": 4.288054506580914e-06, "loss": 0.5127, "step": 8777 }, { "epoch": 1.12, "grad_norm": 0.7751321035269879, "learning_rate": 4.287033446605051e-06, "loss": 0.5716, "step": 8778 }, { "epoch": 1.12, "grad_norm": 0.8007655043186032, "learning_rate": 4.2860124169778964e-06, "loss": 0.5661, "step": 8779 }, { "epoch": 1.12, "grad_norm": 0.5711652050877859, "learning_rate": 4.28499141774291e-06, "loss": 0.4334, "step": 8780 }, { "epoch": 1.12, "grad_norm": 0.7716805471039628, "learning_rate": 4.283970448943555e-06, "loss": 0.4357, "step": 8781 }, { "epoch": 1.12, "grad_norm": 0.5555181498804138, "learning_rate": 4.2829495106232895e-06, "loss": 0.425, "step": 8782 }, { "epoch": 1.12, "grad_norm": 0.5868305371839402, "learning_rate": 4.281928602825573e-06, "loss": 0.4803, "step": 8783 }, { "epoch": 1.12, "grad_norm": 0.6788657402725402, "learning_rate": 4.280907725593861e-06, "loss": 0.4927, "step": 8784 }, { "epoch": 1.12, "grad_norm": 0.7123124466256725, "learning_rate": 4.279886878971608e-06, "loss": 0.5404, "step": 8785 }, { "epoch": 1.12, "grad_norm": 0.7126931129527746, "learning_rate": 4.278866063002268e-06, "loss": 0.5243, "step": 8786 }, { "epoch": 1.12, "grad_norm": 0.6864404507368663, "learning_rate": 4.277845277729298e-06, "loss": 0.5005, "step": 8787 }, { "epoch": 1.12, "grad_norm": 0.7678498373500076, "learning_rate": 4.276824523196146e-06, "loss": 0.5279, "step": 8788 }, { "epoch": 1.12, "grad_norm": 0.6298741301068632, "learning_rate": 4.275803799446263e-06, "loss": 0.4684, "step": 8789 }, { "epoch": 1.12, "grad_norm": 0.8026094621604701, "learning_rate": 4.274783106523099e-06, "loss": 0.573, "step": 8790 }, { "epoch": 1.12, "grad_norm": 0.65629232752146, "learning_rate": 4.2737624444701e-06, "loss": 0.5351, "step": 8791 }, { "epoch": 1.12, "grad_norm": 0.8464828505114691, "learning_rate": 4.272741813330713e-06, "loss": 0.5553, "step": 8792 }, { "epoch": 1.12, "grad_norm": 0.756340470284226, "learning_rate": 4.271721213148384e-06, "loss": 0.5518, "step": 8793 }, { "epoch": 1.12, "grad_norm": 0.7783301469637252, "learning_rate": 4.270700643966555e-06, "loss": 0.5199, "step": 8794 }, { "epoch": 1.12, "grad_norm": 0.6560574216946661, "learning_rate": 4.2696801058286685e-06, "loss": 0.4563, "step": 8795 }, { "epoch": 1.12, "grad_norm": 0.5899349082505061, "learning_rate": 4.268659598778166e-06, "loss": 0.4473, "step": 8796 }, { "epoch": 1.12, "grad_norm": 0.6963688927216121, "learning_rate": 4.267639122858488e-06, "loss": 0.4467, "step": 8797 }, { "epoch": 1.12, "grad_norm": 0.6126800030936337, "learning_rate": 4.2666186781130706e-06, "loss": 0.4689, "step": 8798 }, { "epoch": 1.12, "grad_norm": 0.7855198376686179, "learning_rate": 4.265598264585355e-06, "loss": 0.5468, "step": 8799 }, { "epoch": 1.12, "grad_norm": 0.9390285293092171, "learning_rate": 4.264577882318773e-06, "loss": 0.5517, "step": 8800 }, { "epoch": 1.12, "grad_norm": 0.6855521345250943, "learning_rate": 4.263557531356761e-06, "loss": 0.4548, "step": 8801 }, { "epoch": 1.12, "grad_norm": 0.5769063696116823, "learning_rate": 4.262537211742752e-06, "loss": 0.4425, "step": 8802 }, { "epoch": 1.12, "grad_norm": 0.5732954391046863, "learning_rate": 4.261516923520177e-06, "loss": 0.4125, "step": 8803 }, { "epoch": 1.12, "grad_norm": 0.5956499422494753, "learning_rate": 4.260496666732466e-06, "loss": 0.4549, "step": 8804 }, { "epoch": 1.12, "grad_norm": 0.6353305042450674, "learning_rate": 4.25947644142305e-06, "loss": 0.4711, "step": 8805 }, { "epoch": 1.12, "grad_norm": 0.9048638763083205, "learning_rate": 4.2584562476353545e-06, "loss": 0.5383, "step": 8806 }, { "epoch": 1.12, "grad_norm": 0.7014172119541553, "learning_rate": 4.257436085412807e-06, "loss": 0.5191, "step": 8807 }, { "epoch": 1.12, "grad_norm": 0.9372793001861245, "learning_rate": 4.256415954798833e-06, "loss": 0.4708, "step": 8808 }, { "epoch": 1.12, "grad_norm": 0.6538557046016998, "learning_rate": 4.255395855836856e-06, "loss": 0.442, "step": 8809 }, { "epoch": 1.12, "grad_norm": 0.6381707019285497, "learning_rate": 4.2543757885702956e-06, "loss": 0.4181, "step": 8810 }, { "epoch": 1.12, "grad_norm": 0.580561300876263, "learning_rate": 4.253355753042576e-06, "loss": 0.4428, "step": 8811 }, { "epoch": 1.12, "grad_norm": 0.5893239744886439, "learning_rate": 4.252335749297117e-06, "loss": 0.4486, "step": 8812 }, { "epoch": 1.12, "grad_norm": 0.6898567139437685, "learning_rate": 4.2513157773773365e-06, "loss": 0.4652, "step": 8813 }, { "epoch": 1.12, "grad_norm": 0.7056015871467056, "learning_rate": 4.2502958373266504e-06, "loss": 0.5056, "step": 8814 }, { "epoch": 1.12, "grad_norm": 0.5974202773834117, "learning_rate": 4.249275929188475e-06, "loss": 0.4249, "step": 8815 }, { "epoch": 1.12, "grad_norm": 0.6334414090393656, "learning_rate": 4.248256053006224e-06, "loss": 0.4754, "step": 8816 }, { "epoch": 1.12, "grad_norm": 0.7799495036823637, "learning_rate": 4.247236208823311e-06, "loss": 0.4843, "step": 8817 }, { "epoch": 1.12, "grad_norm": 0.5887747793367608, "learning_rate": 4.246216396683147e-06, "loss": 0.527, "step": 8818 }, { "epoch": 1.12, "grad_norm": 1.3043297308638795, "learning_rate": 4.245196616629142e-06, "loss": 0.546, "step": 8819 }, { "epoch": 1.12, "grad_norm": 0.8643685450581178, "learning_rate": 4.2441768687047055e-06, "loss": 0.5224, "step": 8820 }, { "epoch": 1.12, "grad_norm": 0.6272675124275903, "learning_rate": 4.243157152953244e-06, "loss": 0.481, "step": 8821 }, { "epoch": 1.12, "grad_norm": 1.1797957295211192, "learning_rate": 4.242137469418164e-06, "loss": 0.4391, "step": 8822 }, { "epoch": 1.12, "grad_norm": 3.0286094590805708, "learning_rate": 4.241117818142869e-06, "loss": 0.5224, "step": 8823 }, { "epoch": 1.12, "grad_norm": 0.7572385102685983, "learning_rate": 4.2400981991707654e-06, "loss": 0.4413, "step": 8824 }, { "epoch": 1.12, "grad_norm": 0.6005078606790096, "learning_rate": 4.239078612545252e-06, "loss": 0.4571, "step": 8825 }, { "epoch": 1.12, "grad_norm": 0.6661899109138069, "learning_rate": 4.238059058309731e-06, "loss": 0.444, "step": 8826 }, { "epoch": 1.12, "grad_norm": 0.8352314843219187, "learning_rate": 4.2370395365076e-06, "loss": 0.5569, "step": 8827 }, { "epoch": 1.12, "grad_norm": 0.709840176708013, "learning_rate": 4.236020047182259e-06, "loss": 0.5002, "step": 8828 }, { "epoch": 1.12, "grad_norm": 0.5991316827714752, "learning_rate": 4.235000590377103e-06, "loss": 0.4729, "step": 8829 }, { "epoch": 1.12, "grad_norm": 0.5708281681060035, "learning_rate": 4.233981166135527e-06, "loss": 0.456, "step": 8830 }, { "epoch": 1.13, "grad_norm": 0.8995247446030765, "learning_rate": 4.2329617745009255e-06, "loss": 0.523, "step": 8831 }, { "epoch": 1.13, "grad_norm": 0.6005560682566539, "learning_rate": 4.2319424155166896e-06, "loss": 0.5023, "step": 8832 }, { "epoch": 1.13, "grad_norm": 0.6794776933384548, "learning_rate": 4.230923089226212e-06, "loss": 0.4945, "step": 8833 }, { "epoch": 1.13, "grad_norm": 0.8680528274323568, "learning_rate": 4.22990379567288e-06, "loss": 0.5473, "step": 8834 }, { "epoch": 1.13, "grad_norm": 0.7804034149022726, "learning_rate": 4.22888453490008e-06, "loss": 0.5322, "step": 8835 }, { "epoch": 1.13, "grad_norm": 1.2199721106463128, "learning_rate": 4.227865306951205e-06, "loss": 0.4914, "step": 8836 }, { "epoch": 1.13, "grad_norm": 0.7822910630972447, "learning_rate": 4.226846111869634e-06, "loss": 0.515, "step": 8837 }, { "epoch": 1.13, "grad_norm": 0.5832904899114207, "learning_rate": 4.225826949698756e-06, "loss": 0.4443, "step": 8838 }, { "epoch": 1.13, "grad_norm": 0.7241557654056496, "learning_rate": 4.224807820481949e-06, "loss": 0.4813, "step": 8839 }, { "epoch": 1.13, "grad_norm": 0.7094004004828398, "learning_rate": 4.223788724262597e-06, "loss": 0.5581, "step": 8840 }, { "epoch": 1.13, "grad_norm": 0.7708877712455543, "learning_rate": 4.222769661084078e-06, "loss": 0.5518, "step": 8841 }, { "epoch": 1.13, "grad_norm": 0.6136329442266498, "learning_rate": 4.22175063098977e-06, "loss": 0.4447, "step": 8842 }, { "epoch": 1.13, "grad_norm": 0.6334621032670936, "learning_rate": 4.2207316340230514e-06, "loss": 0.4978, "step": 8843 }, { "epoch": 1.13, "grad_norm": 0.6974682168905553, "learning_rate": 4.219712670227297e-06, "loss": 0.4957, "step": 8844 }, { "epoch": 1.13, "grad_norm": 0.6308555769922878, "learning_rate": 4.218693739645881e-06, "loss": 0.4816, "step": 8845 }, { "epoch": 1.13, "grad_norm": 0.7268561989071667, "learning_rate": 4.217674842322176e-06, "loss": 0.5463, "step": 8846 }, { "epoch": 1.13, "grad_norm": 0.5778709276870324, "learning_rate": 4.216655978299552e-06, "loss": 0.4358, "step": 8847 }, { "epoch": 1.13, "grad_norm": 0.5998293710339382, "learning_rate": 4.215637147621378e-06, "loss": 0.4907, "step": 8848 }, { "epoch": 1.13, "grad_norm": 0.719870876190144, "learning_rate": 4.214618350331027e-06, "loss": 0.5108, "step": 8849 }, { "epoch": 1.13, "grad_norm": 0.7255538086626865, "learning_rate": 4.213599586471863e-06, "loss": 0.5299, "step": 8850 }, { "epoch": 1.13, "grad_norm": 0.6374626330450108, "learning_rate": 4.212580856087251e-06, "loss": 0.451, "step": 8851 }, { "epoch": 1.13, "grad_norm": 0.7172841179198404, "learning_rate": 4.2115621592205565e-06, "loss": 0.5047, "step": 8852 }, { "epoch": 1.13, "grad_norm": 0.8912355896496387, "learning_rate": 4.210543495915141e-06, "loss": 0.5374, "step": 8853 }, { "epoch": 1.13, "grad_norm": 0.6367142068190395, "learning_rate": 4.2095248662143666e-06, "loss": 0.4474, "step": 8854 }, { "epoch": 1.13, "grad_norm": 0.6315193094392435, "learning_rate": 4.2085062701615916e-06, "loss": 0.4302, "step": 8855 }, { "epoch": 1.13, "grad_norm": 0.7011549418738294, "learning_rate": 4.207487707800176e-06, "loss": 0.4922, "step": 8856 }, { "epoch": 1.13, "grad_norm": 0.6902072334270198, "learning_rate": 4.206469179173476e-06, "loss": 0.4887, "step": 8857 }, { "epoch": 1.13, "grad_norm": 0.5876436355786838, "learning_rate": 4.205450684324847e-06, "loss": 0.4639, "step": 8858 }, { "epoch": 1.13, "grad_norm": 0.5818643807679521, "learning_rate": 4.2044322232976444e-06, "loss": 0.4423, "step": 8859 }, { "epoch": 1.13, "grad_norm": 0.568966457609983, "learning_rate": 4.203413796135217e-06, "loss": 0.4479, "step": 8860 }, { "epoch": 1.13, "grad_norm": 0.6786900564098186, "learning_rate": 4.202395402880921e-06, "loss": 0.477, "step": 8861 }, { "epoch": 1.13, "grad_norm": 0.7166849866313526, "learning_rate": 4.201377043578103e-06, "loss": 0.4841, "step": 8862 }, { "epoch": 1.13, "grad_norm": 0.5869151770572896, "learning_rate": 4.200358718270114e-06, "loss": 0.4616, "step": 8863 }, { "epoch": 1.13, "grad_norm": 0.7906584046026495, "learning_rate": 4.199340427000298e-06, "loss": 0.5174, "step": 8864 }, { "epoch": 1.13, "grad_norm": 0.7584836030933219, "learning_rate": 4.198322169812002e-06, "loss": 0.5124, "step": 8865 }, { "epoch": 1.13, "grad_norm": 0.7853310210595729, "learning_rate": 4.197303946748569e-06, "loss": 0.4599, "step": 8866 }, { "epoch": 1.13, "grad_norm": 0.677042962099927, "learning_rate": 4.196285757853342e-06, "loss": 0.4626, "step": 8867 }, { "epoch": 1.13, "grad_norm": 0.589929344842882, "learning_rate": 4.195267603169662e-06, "loss": 0.4007, "step": 8868 }, { "epoch": 1.13, "grad_norm": 0.5947996911596473, "learning_rate": 4.1942494827408685e-06, "loss": 0.4221, "step": 8869 }, { "epoch": 1.13, "grad_norm": 0.6595734885032779, "learning_rate": 4.1932313966103e-06, "loss": 0.4976, "step": 8870 }, { "epoch": 1.13, "grad_norm": 1.1396244402972773, "learning_rate": 4.192213344821293e-06, "loss": 0.4839, "step": 8871 }, { "epoch": 1.13, "grad_norm": 0.588643884617622, "learning_rate": 4.191195327417179e-06, "loss": 0.4778, "step": 8872 }, { "epoch": 1.13, "grad_norm": 0.5849963640387403, "learning_rate": 4.190177344441299e-06, "loss": 0.4616, "step": 8873 }, { "epoch": 1.13, "grad_norm": 0.6118014658680612, "learning_rate": 4.189159395936982e-06, "loss": 0.4733, "step": 8874 }, { "epoch": 1.13, "grad_norm": 0.5344916359361866, "learning_rate": 4.188141481947558e-06, "loss": 0.409, "step": 8875 }, { "epoch": 1.13, "grad_norm": 0.5913422283984344, "learning_rate": 4.1871236025163555e-06, "loss": 0.503, "step": 8876 }, { "epoch": 1.13, "grad_norm": 0.7905925305725864, "learning_rate": 4.186105757686705e-06, "loss": 0.5289, "step": 8877 }, { "epoch": 1.13, "grad_norm": 0.6753084030534934, "learning_rate": 4.185087947501931e-06, "loss": 0.4665, "step": 8878 }, { "epoch": 1.13, "grad_norm": 0.5571779321620085, "learning_rate": 4.184070172005359e-06, "loss": 0.4388, "step": 8879 }, { "epoch": 1.13, "grad_norm": 0.6044655266484702, "learning_rate": 4.183052431240312e-06, "loss": 0.4475, "step": 8880 }, { "epoch": 1.13, "grad_norm": 1.2723111294082243, "learning_rate": 4.182034725250114e-06, "loss": 0.4747, "step": 8881 }, { "epoch": 1.13, "grad_norm": 1.0176464400152592, "learning_rate": 4.1810170540780826e-06, "loss": 0.4841, "step": 8882 }, { "epoch": 1.13, "grad_norm": 0.7675081835243758, "learning_rate": 4.179999417767539e-06, "loss": 0.5361, "step": 8883 }, { "epoch": 1.13, "grad_norm": 0.7027056699374664, "learning_rate": 4.178981816361799e-06, "loss": 0.4888, "step": 8884 }, { "epoch": 1.13, "grad_norm": 0.6187308169977014, "learning_rate": 4.177964249904179e-06, "loss": 0.4736, "step": 8885 }, { "epoch": 1.13, "grad_norm": 0.6993935695545946, "learning_rate": 4.176946718437994e-06, "loss": 0.5236, "step": 8886 }, { "epoch": 1.13, "grad_norm": 0.8617606858911112, "learning_rate": 4.175929222006558e-06, "loss": 0.5814, "step": 8887 }, { "epoch": 1.13, "grad_norm": 0.6173512784789451, "learning_rate": 4.174911760653182e-06, "loss": 0.4179, "step": 8888 }, { "epoch": 1.13, "grad_norm": 0.6606884887732691, "learning_rate": 4.1738943344211735e-06, "loss": 0.4781, "step": 8889 }, { "epoch": 1.13, "grad_norm": 0.9421626490039012, "learning_rate": 4.172876943353845e-06, "loss": 0.5081, "step": 8890 }, { "epoch": 1.13, "grad_norm": 0.6840009487430139, "learning_rate": 4.171859587494502e-06, "loss": 0.4728, "step": 8891 }, { "epoch": 1.13, "grad_norm": 0.6360795948164695, "learning_rate": 4.17084226688645e-06, "loss": 0.4867, "step": 8892 }, { "epoch": 1.13, "grad_norm": 0.8072297640581896, "learning_rate": 4.169824981572993e-06, "loss": 0.5327, "step": 8893 }, { "epoch": 1.13, "grad_norm": 0.6302079145475248, "learning_rate": 4.168807731597434e-06, "loss": 0.4216, "step": 8894 }, { "epoch": 1.13, "grad_norm": 0.6265915277445072, "learning_rate": 4.167790517003074e-06, "loss": 0.4466, "step": 8895 }, { "epoch": 1.13, "grad_norm": 0.8306004570823654, "learning_rate": 4.166773337833212e-06, "loss": 0.5239, "step": 8896 }, { "epoch": 1.13, "grad_norm": 1.0538530611806352, "learning_rate": 4.165756194131145e-06, "loss": 0.51, "step": 8897 }, { "epoch": 1.13, "grad_norm": 0.7418913940773039, "learning_rate": 4.164739085940172e-06, "loss": 0.5105, "step": 8898 }, { "epoch": 1.13, "grad_norm": 0.6867392525629221, "learning_rate": 4.163722013303587e-06, "loss": 0.5311, "step": 8899 }, { "epoch": 1.13, "grad_norm": 0.738323713708081, "learning_rate": 4.1627049762646845e-06, "loss": 0.5327, "step": 8900 }, { "epoch": 1.13, "grad_norm": 0.6522210674657647, "learning_rate": 4.1616879748667545e-06, "loss": 0.494, "step": 8901 }, { "epoch": 1.13, "grad_norm": 0.8231186804607882, "learning_rate": 4.1606710091530885e-06, "loss": 0.4958, "step": 8902 }, { "epoch": 1.13, "grad_norm": 0.5755750166470923, "learning_rate": 4.159654079166976e-06, "loss": 0.4581, "step": 8903 }, { "epoch": 1.13, "grad_norm": 0.7916153775811734, "learning_rate": 4.158637184951704e-06, "loss": 0.5453, "step": 8904 }, { "epoch": 1.13, "grad_norm": 0.8241823371799982, "learning_rate": 4.157620326550558e-06, "loss": 0.575, "step": 8905 }, { "epoch": 1.13, "grad_norm": 0.8173129061707199, "learning_rate": 4.156603504006824e-06, "loss": 0.5308, "step": 8906 }, { "epoch": 1.13, "grad_norm": 0.5174368334878502, "learning_rate": 4.1555867173637825e-06, "loss": 0.4307, "step": 8907 }, { "epoch": 1.13, "grad_norm": 0.6941899279197069, "learning_rate": 4.154569966664716e-06, "loss": 0.5053, "step": 8908 }, { "epoch": 1.13, "grad_norm": 0.7752276093695819, "learning_rate": 4.1535532519529056e-06, "loss": 0.4935, "step": 8909 }, { "epoch": 1.14, "grad_norm": 0.5709264687011458, "learning_rate": 4.152536573271625e-06, "loss": 0.4682, "step": 8910 }, { "epoch": 1.14, "grad_norm": 0.8372322005711126, "learning_rate": 4.151519930664158e-06, "loss": 0.5747, "step": 8911 }, { "epoch": 1.14, "grad_norm": 0.623406384653625, "learning_rate": 4.150503324173775e-06, "loss": 0.4411, "step": 8912 }, { "epoch": 1.14, "grad_norm": 0.6222006459899796, "learning_rate": 4.14948675384375e-06, "loss": 0.4718, "step": 8913 }, { "epoch": 1.14, "grad_norm": 0.6049654092042297, "learning_rate": 4.148470219717358e-06, "loss": 0.4993, "step": 8914 }, { "epoch": 1.14, "grad_norm": 0.7745050340158272, "learning_rate": 4.147453721837866e-06, "loss": 0.5273, "step": 8915 }, { "epoch": 1.14, "grad_norm": 0.6109987895486814, "learning_rate": 4.146437260248546e-06, "loss": 0.4503, "step": 8916 }, { "epoch": 1.14, "grad_norm": 0.6793429391527532, "learning_rate": 4.145420834992664e-06, "loss": 0.5184, "step": 8917 }, { "epoch": 1.14, "grad_norm": 0.8818497858691048, "learning_rate": 4.144404446113486e-06, "loss": 0.5765, "step": 8918 }, { "epoch": 1.14, "grad_norm": 1.021306485742972, "learning_rate": 4.143388093654276e-06, "loss": 0.5162, "step": 8919 }, { "epoch": 1.14, "grad_norm": 0.5859749858002101, "learning_rate": 4.142371777658299e-06, "loss": 0.4239, "step": 8920 }, { "epoch": 1.14, "grad_norm": 0.6299049601946107, "learning_rate": 4.141355498168813e-06, "loss": 0.4581, "step": 8921 }, { "epoch": 1.14, "grad_norm": 0.8455046076539325, "learning_rate": 4.140339255229079e-06, "loss": 0.5296, "step": 8922 }, { "epoch": 1.14, "grad_norm": 1.136195459596776, "learning_rate": 4.139323048882357e-06, "loss": 0.5579, "step": 8923 }, { "epoch": 1.14, "grad_norm": 0.7686804183218642, "learning_rate": 4.138306879171903e-06, "loss": 0.5021, "step": 8924 }, { "epoch": 1.14, "grad_norm": 0.6137399353408208, "learning_rate": 4.137290746140972e-06, "loss": 0.4575, "step": 8925 }, { "epoch": 1.14, "grad_norm": 0.8048452011345688, "learning_rate": 4.136274649832816e-06, "loss": 0.4721, "step": 8926 }, { "epoch": 1.14, "grad_norm": 0.6495430245986274, "learning_rate": 4.135258590290688e-06, "loss": 0.4444, "step": 8927 }, { "epoch": 1.14, "grad_norm": 0.847925245009506, "learning_rate": 4.13424256755784e-06, "loss": 0.5336, "step": 8928 }, { "epoch": 1.14, "grad_norm": 0.6373667724772821, "learning_rate": 4.133226581677518e-06, "loss": 0.4617, "step": 8929 }, { "epoch": 1.14, "grad_norm": 0.6544387934433573, "learning_rate": 4.132210632692971e-06, "loss": 0.4975, "step": 8930 }, { "epoch": 1.14, "grad_norm": 0.952005884589329, "learning_rate": 4.131194720647445e-06, "loss": 0.5441, "step": 8931 }, { "epoch": 1.14, "grad_norm": 0.7160581309498305, "learning_rate": 4.130178845584183e-06, "loss": 0.4449, "step": 8932 }, { "epoch": 1.14, "grad_norm": 0.606984407071753, "learning_rate": 4.129163007546427e-06, "loss": 0.4834, "step": 8933 }, { "epoch": 1.14, "grad_norm": 0.6049660768632178, "learning_rate": 4.128147206577417e-06, "loss": 0.4182, "step": 8934 }, { "epoch": 1.14, "grad_norm": 1.016503241966969, "learning_rate": 4.127131442720398e-06, "loss": 0.4922, "step": 8935 }, { "epoch": 1.14, "grad_norm": 0.5893463205746421, "learning_rate": 4.126115716018603e-06, "loss": 0.4684, "step": 8936 }, { "epoch": 1.14, "grad_norm": 0.6885041023236379, "learning_rate": 4.12510002651527e-06, "loss": 0.4548, "step": 8937 }, { "epoch": 1.14, "grad_norm": 0.6365596791560154, "learning_rate": 4.124084374253633e-06, "loss": 0.4616, "step": 8938 }, { "epoch": 1.14, "grad_norm": 0.7166848715376369, "learning_rate": 4.123068759276925e-06, "loss": 0.5603, "step": 8939 }, { "epoch": 1.14, "grad_norm": 0.8134642852047768, "learning_rate": 4.1220531816283775e-06, "loss": 0.5208, "step": 8940 }, { "epoch": 1.14, "grad_norm": 0.9409062362021231, "learning_rate": 4.121037641351221e-06, "loss": 0.5247, "step": 8941 }, { "epoch": 1.14, "grad_norm": 0.5593972316517262, "learning_rate": 4.120022138488683e-06, "loss": 0.4282, "step": 8942 }, { "epoch": 1.14, "grad_norm": 0.6206467823627109, "learning_rate": 4.119006673083991e-06, "loss": 0.5663, "step": 8943 }, { "epoch": 1.14, "grad_norm": 1.049580058284063, "learning_rate": 4.117991245180369e-06, "loss": 0.4577, "step": 8944 }, { "epoch": 1.14, "grad_norm": 0.5608642755061072, "learning_rate": 4.116975854821041e-06, "loss": 0.4343, "step": 8945 }, { "epoch": 1.14, "grad_norm": 0.6027099428056472, "learning_rate": 4.11596050204923e-06, "loss": 0.4791, "step": 8946 }, { "epoch": 1.14, "grad_norm": 0.5888289668248263, "learning_rate": 4.114945186908154e-06, "loss": 0.4461, "step": 8947 }, { "epoch": 1.14, "grad_norm": 0.5931289785112412, "learning_rate": 4.113929909441034e-06, "loss": 0.4489, "step": 8948 }, { "epoch": 1.14, "grad_norm": 0.6288183169782241, "learning_rate": 4.112914669691086e-06, "loss": 0.466, "step": 8949 }, { "epoch": 1.14, "grad_norm": 2.130370526799863, "learning_rate": 4.1118994677015265e-06, "loss": 0.5343, "step": 8950 }, { "epoch": 1.14, "grad_norm": 0.6846604108310098, "learning_rate": 4.110884303515568e-06, "loss": 0.4754, "step": 8951 }, { "epoch": 1.14, "grad_norm": 0.571214086548935, "learning_rate": 4.109869177176424e-06, "loss": 0.4288, "step": 8952 }, { "epoch": 1.14, "grad_norm": 0.6284123393722311, "learning_rate": 4.108854088727304e-06, "loss": 0.4621, "step": 8953 }, { "epoch": 1.14, "grad_norm": 0.597164421679138, "learning_rate": 4.107839038211419e-06, "loss": 0.5202, "step": 8954 }, { "epoch": 1.14, "grad_norm": 0.7273766474979937, "learning_rate": 4.1068240256719746e-06, "loss": 0.4573, "step": 8955 }, { "epoch": 1.14, "grad_norm": 0.6700952731868057, "learning_rate": 4.105809051152177e-06, "loss": 0.4877, "step": 8956 }, { "epoch": 1.14, "grad_norm": 0.9662626445861817, "learning_rate": 4.104794114695231e-06, "loss": 0.5201, "step": 8957 }, { "epoch": 1.14, "grad_norm": 0.6337353218064731, "learning_rate": 4.103779216344339e-06, "loss": 0.4806, "step": 8958 }, { "epoch": 1.14, "grad_norm": 0.597347468555482, "learning_rate": 4.1027643561427e-06, "loss": 0.4529, "step": 8959 }, { "epoch": 1.14, "grad_norm": 0.7032296208176871, "learning_rate": 4.101749534133517e-06, "loss": 0.5353, "step": 8960 }, { "epoch": 1.14, "grad_norm": 0.8109327372478944, "learning_rate": 4.100734750359986e-06, "loss": 0.4953, "step": 8961 }, { "epoch": 1.14, "grad_norm": 0.5384636345651621, "learning_rate": 4.099720004865303e-06, "loss": 0.4236, "step": 8962 }, { "epoch": 1.14, "grad_norm": 0.5847172012755929, "learning_rate": 4.098705297692662e-06, "loss": 0.4492, "step": 8963 }, { "epoch": 1.14, "grad_norm": 0.6224556095872419, "learning_rate": 4.097690628885257e-06, "loss": 0.4971, "step": 8964 }, { "epoch": 1.14, "grad_norm": 0.8287472555661987, "learning_rate": 4.096675998486278e-06, "loss": 0.5301, "step": 8965 }, { "epoch": 1.14, "grad_norm": 0.7877916671790234, "learning_rate": 4.095661406538916e-06, "loss": 0.5595, "step": 8966 }, { "epoch": 1.14, "grad_norm": 0.6907176868516108, "learning_rate": 4.094646853086357e-06, "loss": 0.5248, "step": 8967 }, { "epoch": 1.14, "grad_norm": 0.6387418884297833, "learning_rate": 4.093632338171789e-06, "loss": 0.5222, "step": 8968 }, { "epoch": 1.14, "grad_norm": 0.6841759868077028, "learning_rate": 4.092617861838396e-06, "loss": 0.4741, "step": 8969 }, { "epoch": 1.14, "grad_norm": 0.5932066233094653, "learning_rate": 4.09160342412936e-06, "loss": 0.4517, "step": 8970 }, { "epoch": 1.14, "grad_norm": 0.5560793362032284, "learning_rate": 4.090589025087864e-06, "loss": 0.4687, "step": 8971 }, { "epoch": 1.14, "grad_norm": 0.67358657501909, "learning_rate": 4.089574664757086e-06, "loss": 0.503, "step": 8972 }, { "epoch": 1.14, "grad_norm": 0.5601399046352287, "learning_rate": 4.088560343180207e-06, "loss": 0.4659, "step": 8973 }, { "epoch": 1.14, "grad_norm": 0.5806752798821936, "learning_rate": 4.087546060400401e-06, "loss": 0.4736, "step": 8974 }, { "epoch": 1.14, "grad_norm": 0.890918080455701, "learning_rate": 4.0865318164608435e-06, "loss": 0.432, "step": 8975 }, { "epoch": 1.14, "grad_norm": 0.6062355402362681, "learning_rate": 4.085517611404708e-06, "loss": 0.5202, "step": 8976 }, { "epoch": 1.14, "grad_norm": 0.6896890431382324, "learning_rate": 4.0845034452751656e-06, "loss": 0.4868, "step": 8977 }, { "epoch": 1.14, "grad_norm": 0.6243644930165145, "learning_rate": 4.083489318115385e-06, "loss": 0.4485, "step": 8978 }, { "epoch": 1.14, "grad_norm": 0.6919195789594957, "learning_rate": 4.082475229968536e-06, "loss": 0.5431, "step": 8979 }, { "epoch": 1.14, "grad_norm": 0.9770142347219757, "learning_rate": 4.0814611808777845e-06, "loss": 0.5338, "step": 8980 }, { "epoch": 1.14, "grad_norm": 0.7213578510869765, "learning_rate": 4.080447170886296e-06, "loss": 0.481, "step": 8981 }, { "epoch": 1.14, "grad_norm": 0.6027346606298167, "learning_rate": 4.079433200037233e-06, "loss": 0.4404, "step": 8982 }, { "epoch": 1.14, "grad_norm": 0.6706439239375428, "learning_rate": 4.078419268373756e-06, "loss": 0.493, "step": 8983 }, { "epoch": 1.14, "grad_norm": 1.4632090335888754, "learning_rate": 4.077405375939023e-06, "loss": 0.5167, "step": 8984 }, { "epoch": 1.14, "grad_norm": 0.5743036022268803, "learning_rate": 4.076391522776198e-06, "loss": 0.4724, "step": 8985 }, { "epoch": 1.14, "grad_norm": 1.000783417638258, "learning_rate": 4.0753777089284345e-06, "loss": 0.4938, "step": 8986 }, { "epoch": 1.14, "grad_norm": 0.6789018943041462, "learning_rate": 4.074363934438888e-06, "loss": 0.4857, "step": 8987 }, { "epoch": 1.15, "grad_norm": 1.0172097982351374, "learning_rate": 4.07335019935071e-06, "loss": 0.503, "step": 8988 }, { "epoch": 1.15, "grad_norm": 0.5806497672232582, "learning_rate": 4.072336503707053e-06, "loss": 0.4173, "step": 8989 }, { "epoch": 1.15, "grad_norm": 0.6914722764442861, "learning_rate": 4.071322847551066e-06, "loss": 0.4729, "step": 8990 }, { "epoch": 1.15, "grad_norm": 0.7195830021633124, "learning_rate": 4.070309230925898e-06, "loss": 0.4907, "step": 8991 }, { "epoch": 1.15, "grad_norm": 0.6125785274309221, "learning_rate": 4.0692956538746966e-06, "loss": 0.4695, "step": 8992 }, { "epoch": 1.15, "grad_norm": 0.6989050923253964, "learning_rate": 4.068282116440604e-06, "loss": 0.4747, "step": 8993 }, { "epoch": 1.15, "grad_norm": 0.5832176441382122, "learning_rate": 4.067268618666764e-06, "loss": 0.4553, "step": 8994 }, { "epoch": 1.15, "grad_norm": 0.6889027638096344, "learning_rate": 4.066255160596319e-06, "loss": 0.5304, "step": 8995 }, { "epoch": 1.15, "grad_norm": 0.761549075424479, "learning_rate": 4.0652417422724054e-06, "loss": 0.5378, "step": 8996 }, { "epoch": 1.15, "grad_norm": 0.6770589232801114, "learning_rate": 4.064228363738166e-06, "loss": 0.5063, "step": 8997 }, { "epoch": 1.15, "grad_norm": 0.6509939339017758, "learning_rate": 4.063215025036735e-06, "loss": 0.492, "step": 8998 }, { "epoch": 1.15, "grad_norm": 0.8168004636442611, "learning_rate": 4.062201726211247e-06, "loss": 0.5236, "step": 8999 }, { "epoch": 1.15, "grad_norm": 0.8033615596745731, "learning_rate": 4.061188467304834e-06, "loss": 0.5269, "step": 9000 }, { "epoch": 1.15, "grad_norm": 0.638583581326899, "learning_rate": 4.060175248360629e-06, "loss": 0.4864, "step": 9001 }, { "epoch": 1.15, "grad_norm": 0.7500171028160009, "learning_rate": 4.05916206942176e-06, "loss": 0.4886, "step": 9002 }, { "epoch": 1.15, "grad_norm": 0.6828589949118525, "learning_rate": 4.058148930531355e-06, "loss": 0.5003, "step": 9003 }, { "epoch": 1.15, "grad_norm": 0.7868472436310205, "learning_rate": 4.05713583173254e-06, "loss": 0.5041, "step": 9004 }, { "epoch": 1.15, "grad_norm": 0.6050353891825929, "learning_rate": 4.056122773068441e-06, "loss": 0.4544, "step": 9005 }, { "epoch": 1.15, "grad_norm": 0.8083388533830902, "learning_rate": 4.055109754582178e-06, "loss": 0.4887, "step": 9006 }, { "epoch": 1.15, "grad_norm": 0.646573712630938, "learning_rate": 4.054096776316874e-06, "loss": 0.4645, "step": 9007 }, { "epoch": 1.15, "grad_norm": 0.672270820341946, "learning_rate": 4.053083838315647e-06, "loss": 0.4538, "step": 9008 }, { "epoch": 1.15, "grad_norm": 0.6628567131546937, "learning_rate": 4.052070940621614e-06, "loss": 0.432, "step": 9009 }, { "epoch": 1.15, "grad_norm": 0.6478913514779144, "learning_rate": 4.051058083277894e-06, "loss": 0.4497, "step": 9010 }, { "epoch": 1.15, "grad_norm": 0.5845577247889107, "learning_rate": 4.050045266327598e-06, "loss": 0.4767, "step": 9011 }, { "epoch": 1.15, "grad_norm": 1.0573059363922057, "learning_rate": 4.04903248981384e-06, "loss": 0.5476, "step": 9012 }, { "epoch": 1.15, "grad_norm": 0.6472651449472857, "learning_rate": 4.0480197537797286e-06, "loss": 0.4536, "step": 9013 }, { "epoch": 1.15, "grad_norm": 0.7598329002180562, "learning_rate": 4.047007058268376e-06, "loss": 0.5106, "step": 9014 }, { "epoch": 1.15, "grad_norm": 0.7673200262458939, "learning_rate": 4.045994403322887e-06, "loss": 0.5061, "step": 9015 }, { "epoch": 1.15, "grad_norm": 0.9184684277275242, "learning_rate": 4.044981788986367e-06, "loss": 0.4862, "step": 9016 }, { "epoch": 1.15, "grad_norm": 0.6969550444167093, "learning_rate": 4.043969215301922e-06, "loss": 0.5409, "step": 9017 }, { "epoch": 1.15, "grad_norm": 0.6221736352650029, "learning_rate": 4.042956682312651e-06, "loss": 0.476, "step": 9018 }, { "epoch": 1.15, "grad_norm": 0.6134582174370081, "learning_rate": 4.041944190061656e-06, "loss": 0.4461, "step": 9019 }, { "epoch": 1.15, "grad_norm": 0.631557806281466, "learning_rate": 4.040931738592036e-06, "loss": 0.4557, "step": 9020 }, { "epoch": 1.15, "grad_norm": 0.6065319619591251, "learning_rate": 4.039919327946886e-06, "loss": 0.4687, "step": 9021 }, { "epoch": 1.15, "grad_norm": 0.7023971073960384, "learning_rate": 4.038906958169303e-06, "loss": 0.4781, "step": 9022 }, { "epoch": 1.15, "grad_norm": 0.6913650205082353, "learning_rate": 4.0378946293023796e-06, "loss": 0.5183, "step": 9023 }, { "epoch": 1.15, "grad_norm": 0.7805906534382324, "learning_rate": 4.036882341389207e-06, "loss": 0.443, "step": 9024 }, { "epoch": 1.15, "grad_norm": 0.6412853220892517, "learning_rate": 4.035870094472876e-06, "loss": 0.4661, "step": 9025 }, { "epoch": 1.15, "grad_norm": 0.5563209632543178, "learning_rate": 4.034857888596474e-06, "loss": 0.3932, "step": 9026 }, { "epoch": 1.15, "grad_norm": 0.6111839346361122, "learning_rate": 4.033845723803088e-06, "loss": 0.4411, "step": 9027 }, { "epoch": 1.15, "grad_norm": 0.6244846543392127, "learning_rate": 4.032833600135802e-06, "loss": 0.462, "step": 9028 }, { "epoch": 1.15, "grad_norm": 0.6177985738441802, "learning_rate": 4.031821517637698e-06, "loss": 0.4518, "step": 9029 }, { "epoch": 1.15, "grad_norm": 0.6938530739007701, "learning_rate": 4.030809476351859e-06, "loss": 0.4707, "step": 9030 }, { "epoch": 1.15, "grad_norm": 0.6054436275252574, "learning_rate": 4.029797476321363e-06, "loss": 0.4833, "step": 9031 }, { "epoch": 1.15, "grad_norm": 0.6034519654419125, "learning_rate": 4.028785517589289e-06, "loss": 0.4635, "step": 9032 }, { "epoch": 1.15, "grad_norm": 0.8155015179816563, "learning_rate": 4.027773600198711e-06, "loss": 0.4319, "step": 9033 }, { "epoch": 1.15, "grad_norm": 0.6682705107658354, "learning_rate": 4.026761724192702e-06, "loss": 0.4966, "step": 9034 }, { "epoch": 1.15, "grad_norm": 0.7560401763168254, "learning_rate": 4.02574988961434e-06, "loss": 0.5027, "step": 9035 }, { "epoch": 1.15, "grad_norm": 0.6071252331146583, "learning_rate": 4.02473809650669e-06, "loss": 0.4222, "step": 9036 }, { "epoch": 1.15, "grad_norm": 0.5665434685751473, "learning_rate": 4.023726344912824e-06, "loss": 0.447, "step": 9037 }, { "epoch": 1.15, "grad_norm": 0.5770185103714324, "learning_rate": 4.0227146348758075e-06, "loss": 0.5072, "step": 9038 }, { "epoch": 1.15, "grad_norm": 0.5667922612344445, "learning_rate": 4.021702966438705e-06, "loss": 0.4384, "step": 9039 }, { "epoch": 1.15, "grad_norm": 0.6097516268978369, "learning_rate": 4.020691339644582e-06, "loss": 0.492, "step": 9040 }, { "epoch": 1.15, "grad_norm": 0.7460488808352627, "learning_rate": 4.0196797545364995e-06, "loss": 0.5028, "step": 9041 }, { "epoch": 1.15, "grad_norm": 0.6187724371519613, "learning_rate": 4.018668211157516e-06, "loss": 0.4718, "step": 9042 }, { "epoch": 1.15, "grad_norm": 0.7455951878050731, "learning_rate": 4.017656709550693e-06, "loss": 0.5081, "step": 9043 }, { "epoch": 1.15, "grad_norm": 0.7889960882207994, "learning_rate": 4.016645249759082e-06, "loss": 0.4602, "step": 9044 }, { "epoch": 1.15, "grad_norm": 0.6143357283415994, "learning_rate": 4.0156338318257425e-06, "loss": 0.4307, "step": 9045 }, { "epoch": 1.15, "grad_norm": 0.5838916648152049, "learning_rate": 4.014622455793723e-06, "loss": 0.4506, "step": 9046 }, { "epoch": 1.15, "grad_norm": 0.5911372939789319, "learning_rate": 4.013611121706077e-06, "loss": 0.5167, "step": 9047 }, { "epoch": 1.15, "grad_norm": 0.8257912464304906, "learning_rate": 4.012599829605855e-06, "loss": 0.4686, "step": 9048 }, { "epoch": 1.15, "grad_norm": 0.7150846440018089, "learning_rate": 4.011588579536103e-06, "loss": 0.5238, "step": 9049 }, { "epoch": 1.15, "grad_norm": 0.8119351417033279, "learning_rate": 4.010577371539867e-06, "loss": 0.4893, "step": 9050 }, { "epoch": 1.15, "grad_norm": 0.626663596403873, "learning_rate": 4.009566205660189e-06, "loss": 0.4948, "step": 9051 }, { "epoch": 1.15, "grad_norm": 0.8603723504016599, "learning_rate": 4.0085550819401134e-06, "loss": 0.5311, "step": 9052 }, { "epoch": 1.15, "grad_norm": 0.9767395437781328, "learning_rate": 4.00754400042268e-06, "loss": 0.5491, "step": 9053 }, { "epoch": 1.15, "grad_norm": 0.8276632949212203, "learning_rate": 4.006532961150927e-06, "loss": 0.4969, "step": 9054 }, { "epoch": 1.15, "grad_norm": 0.6460748235757424, "learning_rate": 4.005521964167891e-06, "loss": 0.4752, "step": 9055 }, { "epoch": 1.15, "grad_norm": 0.6507742802554809, "learning_rate": 4.004511009516607e-06, "loss": 0.4548, "step": 9056 }, { "epoch": 1.15, "grad_norm": 0.5536586439501516, "learning_rate": 4.003500097240109e-06, "loss": 0.4739, "step": 9057 }, { "epoch": 1.15, "grad_norm": 0.6861642040176475, "learning_rate": 4.002489227381425e-06, "loss": 0.4602, "step": 9058 }, { "epoch": 1.15, "grad_norm": 0.7315041685257905, "learning_rate": 4.001478399983589e-06, "loss": 0.4793, "step": 9059 }, { "epoch": 1.15, "grad_norm": 0.5938745482269892, "learning_rate": 4.000467615089626e-06, "loss": 0.4507, "step": 9060 }, { "epoch": 1.15, "grad_norm": 0.6054268412964247, "learning_rate": 3.999456872742564e-06, "loss": 0.4614, "step": 9061 }, { "epoch": 1.15, "grad_norm": 0.8533290144798112, "learning_rate": 3.9984461729854245e-06, "loss": 0.4816, "step": 9062 }, { "epoch": 1.15, "grad_norm": 0.7309372142370067, "learning_rate": 3.997435515861231e-06, "loss": 0.5166, "step": 9063 }, { "epoch": 1.15, "grad_norm": 0.5973625572814367, "learning_rate": 3.996424901413004e-06, "loss": 0.4724, "step": 9064 }, { "epoch": 1.15, "grad_norm": 0.7438344152897951, "learning_rate": 3.995414329683762e-06, "loss": 0.461, "step": 9065 }, { "epoch": 1.15, "grad_norm": 0.5631106322421988, "learning_rate": 3.994403800716523e-06, "loss": 0.4277, "step": 9066 }, { "epoch": 1.16, "grad_norm": 0.5838279354196818, "learning_rate": 3.993393314554299e-06, "loss": 0.4752, "step": 9067 }, { "epoch": 1.16, "grad_norm": 0.7605875248332076, "learning_rate": 3.992382871240106e-06, "loss": 0.4666, "step": 9068 }, { "epoch": 1.16, "grad_norm": 0.680995340606293, "learning_rate": 3.991372470816954e-06, "loss": 0.4718, "step": 9069 }, { "epoch": 1.16, "grad_norm": 0.5152597336678216, "learning_rate": 3.990362113327853e-06, "loss": 0.3887, "step": 9070 }, { "epoch": 1.16, "grad_norm": 0.6079573837752223, "learning_rate": 3.989351798815808e-06, "loss": 0.4939, "step": 9071 }, { "epoch": 1.16, "grad_norm": 0.8132652654058701, "learning_rate": 3.98834152732383e-06, "loss": 0.4989, "step": 9072 }, { "epoch": 1.16, "grad_norm": 0.5725514724098311, "learning_rate": 3.98733129889492e-06, "loss": 0.4218, "step": 9073 }, { "epoch": 1.16, "grad_norm": 0.5594307124063322, "learning_rate": 3.98632111357208e-06, "loss": 0.465, "step": 9074 }, { "epoch": 1.16, "grad_norm": 0.5908550158977094, "learning_rate": 3.98531097139831e-06, "loss": 0.4296, "step": 9075 }, { "epoch": 1.16, "grad_norm": 0.7342108400684618, "learning_rate": 3.984300872416612e-06, "loss": 0.552, "step": 9076 }, { "epoch": 1.16, "grad_norm": 0.8040761807166786, "learning_rate": 3.983290816669979e-06, "loss": 0.5334, "step": 9077 }, { "epoch": 1.16, "grad_norm": 0.7062037775856744, "learning_rate": 3.982280804201407e-06, "loss": 0.4834, "step": 9078 }, { "epoch": 1.16, "grad_norm": 0.7694091459615919, "learning_rate": 3.981270835053889e-06, "loss": 0.474, "step": 9079 }, { "epoch": 1.16, "grad_norm": 0.6418411681188796, "learning_rate": 3.980260909270416e-06, "loss": 0.4525, "step": 9080 }, { "epoch": 1.16, "grad_norm": 0.5865001399814008, "learning_rate": 3.979251026893977e-06, "loss": 0.4726, "step": 9081 }, { "epoch": 1.16, "grad_norm": 0.7546366528818873, "learning_rate": 3.978241187967561e-06, "loss": 0.5606, "step": 9082 }, { "epoch": 1.16, "grad_norm": 0.7466575105866234, "learning_rate": 3.977231392534152e-06, "loss": 0.5011, "step": 9083 }, { "epoch": 1.16, "grad_norm": 0.5675433021653249, "learning_rate": 3.976221640636734e-06, "loss": 0.4636, "step": 9084 }, { "epoch": 1.16, "grad_norm": 0.6245798588680239, "learning_rate": 3.9752119323182906e-06, "loss": 0.4712, "step": 9085 }, { "epoch": 1.16, "grad_norm": 0.8089754289607965, "learning_rate": 3.974202267621799e-06, "loss": 0.5458, "step": 9086 }, { "epoch": 1.16, "grad_norm": 0.7455968135757399, "learning_rate": 3.973192646590239e-06, "loss": 0.5792, "step": 9087 }, { "epoch": 1.16, "grad_norm": 0.80761384602154, "learning_rate": 3.972183069266588e-06, "loss": 0.5336, "step": 9088 }, { "epoch": 1.16, "grad_norm": 0.7661715973771724, "learning_rate": 3.971173535693819e-06, "loss": 0.4915, "step": 9089 }, { "epoch": 1.16, "grad_norm": 0.6546664827639979, "learning_rate": 3.9701640459149045e-06, "loss": 0.471, "step": 9090 }, { "epoch": 1.16, "grad_norm": 0.736428828910854, "learning_rate": 3.969154599972815e-06, "loss": 0.449, "step": 9091 }, { "epoch": 1.16, "grad_norm": 0.7050627946695277, "learning_rate": 3.968145197910521e-06, "loss": 0.5227, "step": 9092 }, { "epoch": 1.16, "grad_norm": 0.8317858416370292, "learning_rate": 3.967135839770989e-06, "loss": 0.5211, "step": 9093 }, { "epoch": 1.16, "grad_norm": 0.6304214899789228, "learning_rate": 3.966126525597182e-06, "loss": 0.4257, "step": 9094 }, { "epoch": 1.16, "grad_norm": 0.5291623505855394, "learning_rate": 3.965117255432067e-06, "loss": 0.4246, "step": 9095 }, { "epoch": 1.16, "grad_norm": 0.5870379339359486, "learning_rate": 3.9641080293186005e-06, "loss": 0.4106, "step": 9096 }, { "epoch": 1.16, "grad_norm": 0.5793816523184112, "learning_rate": 3.963098847299746e-06, "loss": 0.4289, "step": 9097 }, { "epoch": 1.16, "grad_norm": 0.613706039667801, "learning_rate": 3.962089709418463e-06, "loss": 0.4932, "step": 9098 }, { "epoch": 1.16, "grad_norm": 0.963706234550816, "learning_rate": 3.961080615717702e-06, "loss": 0.4763, "step": 9099 }, { "epoch": 1.16, "grad_norm": 0.7744957939857139, "learning_rate": 3.96007156624042e-06, "loss": 0.5497, "step": 9100 }, { "epoch": 1.16, "grad_norm": 0.7003710811321566, "learning_rate": 3.959062561029567e-06, "loss": 0.5037, "step": 9101 }, { "epoch": 1.16, "grad_norm": 0.6349705294957484, "learning_rate": 3.9580536001280965e-06, "loss": 0.4975, "step": 9102 }, { "epoch": 1.16, "grad_norm": 0.6906702390940155, "learning_rate": 3.957044683578953e-06, "loss": 0.4872, "step": 9103 }, { "epoch": 1.16, "grad_norm": 0.6341404581796939, "learning_rate": 3.9560358114250855e-06, "loss": 0.4845, "step": 9104 }, { "epoch": 1.16, "grad_norm": 0.7150037390349223, "learning_rate": 3.955026983709437e-06, "loss": 0.4792, "step": 9105 }, { "epoch": 1.16, "grad_norm": 0.7174431303440013, "learning_rate": 3.954018200474951e-06, "loss": 0.48, "step": 9106 }, { "epoch": 1.16, "grad_norm": 0.7808515385455057, "learning_rate": 3.953009461764568e-06, "loss": 0.5819, "step": 9107 }, { "epoch": 1.16, "grad_norm": 0.6942780320023652, "learning_rate": 3.952000767621224e-06, "loss": 0.4963, "step": 9108 }, { "epoch": 1.16, "grad_norm": 0.657704003477227, "learning_rate": 3.95099211808786e-06, "loss": 0.478, "step": 9109 }, { "epoch": 1.16, "grad_norm": 0.7712681903048891, "learning_rate": 3.94998351320741e-06, "loss": 0.4958, "step": 9110 }, { "epoch": 1.16, "grad_norm": 0.7358224316257641, "learning_rate": 3.948974953022805e-06, "loss": 0.5168, "step": 9111 }, { "epoch": 1.16, "grad_norm": 0.6010775462509673, "learning_rate": 3.94796643757698e-06, "loss": 0.4673, "step": 9112 }, { "epoch": 1.16, "grad_norm": 0.819647934147269, "learning_rate": 3.94695796691286e-06, "loss": 0.5547, "step": 9113 }, { "epoch": 1.16, "grad_norm": 0.7964932874161982, "learning_rate": 3.945949541073376e-06, "loss": 0.4886, "step": 9114 }, { "epoch": 1.16, "grad_norm": 0.602785597339483, "learning_rate": 3.94494116010145e-06, "loss": 0.4701, "step": 9115 }, { "epoch": 1.16, "grad_norm": 0.665245239858181, "learning_rate": 3.943932824040009e-06, "loss": 0.489, "step": 9116 }, { "epoch": 1.16, "grad_norm": 0.668834178179799, "learning_rate": 3.942924532931971e-06, "loss": 0.4795, "step": 9117 }, { "epoch": 1.16, "grad_norm": 0.5957240798870477, "learning_rate": 3.94191628682026e-06, "loss": 0.4317, "step": 9118 }, { "epoch": 1.16, "grad_norm": 0.5440271368903683, "learning_rate": 3.94090808574779e-06, "loss": 0.4569, "step": 9119 }, { "epoch": 1.16, "grad_norm": 0.6572196855398421, "learning_rate": 3.939899929757477e-06, "loss": 0.4942, "step": 9120 }, { "epoch": 1.16, "grad_norm": 0.9205188706946809, "learning_rate": 3.938891818892238e-06, "loss": 0.5233, "step": 9121 }, { "epoch": 1.16, "grad_norm": 0.6757955028486089, "learning_rate": 3.9378837531949834e-06, "loss": 0.4662, "step": 9122 }, { "epoch": 1.16, "grad_norm": 0.6469780203792238, "learning_rate": 3.9368757327086235e-06, "loss": 0.4821, "step": 9123 }, { "epoch": 1.16, "grad_norm": 0.7221028375024128, "learning_rate": 3.935867757476067e-06, "loss": 0.5425, "step": 9124 }, { "epoch": 1.16, "grad_norm": 0.8028123857059496, "learning_rate": 3.934859827540219e-06, "loss": 0.5813, "step": 9125 }, { "epoch": 1.16, "grad_norm": 2.0441537245520633, "learning_rate": 3.9338519429439844e-06, "loss": 0.5078, "step": 9126 }, { "epoch": 1.16, "grad_norm": 0.6024806031923067, "learning_rate": 3.932844103730266e-06, "loss": 0.4135, "step": 9127 }, { "epoch": 1.16, "grad_norm": 0.5841228906941894, "learning_rate": 3.931836309941964e-06, "loss": 0.4907, "step": 9128 }, { "epoch": 1.16, "grad_norm": 0.7006906670491828, "learning_rate": 3.930828561621977e-06, "loss": 0.509, "step": 9129 }, { "epoch": 1.16, "grad_norm": 0.6420572105852815, "learning_rate": 3.929820858813201e-06, "loss": 0.4455, "step": 9130 }, { "epoch": 1.16, "grad_norm": 0.6647095300568276, "learning_rate": 3.928813201558531e-06, "loss": 0.4785, "step": 9131 }, { "epoch": 1.16, "grad_norm": 0.6692811303006562, "learning_rate": 3.927805589900861e-06, "loss": 0.5052, "step": 9132 }, { "epoch": 1.16, "grad_norm": 0.7395503856659327, "learning_rate": 3.926798023883079e-06, "loss": 0.4945, "step": 9133 }, { "epoch": 1.16, "grad_norm": 0.7046937754212744, "learning_rate": 3.925790503548077e-06, "loss": 0.5408, "step": 9134 }, { "epoch": 1.16, "grad_norm": 0.7632893059144262, "learning_rate": 3.92478302893874e-06, "loss": 0.5102, "step": 9135 }, { "epoch": 1.16, "grad_norm": 0.6120191728587682, "learning_rate": 3.923775600097954e-06, "loss": 0.4171, "step": 9136 }, { "epoch": 1.16, "grad_norm": 0.6654767368305994, "learning_rate": 3.922768217068601e-06, "loss": 0.4044, "step": 9137 }, { "epoch": 1.16, "grad_norm": 0.5990582930444863, "learning_rate": 3.921760879893563e-06, "loss": 0.4307, "step": 9138 }, { "epoch": 1.16, "grad_norm": 0.6657303033587134, "learning_rate": 3.92075358861572e-06, "loss": 0.4622, "step": 9139 }, { "epoch": 1.16, "grad_norm": 2.0007860509307496, "learning_rate": 3.919746343277947e-06, "loss": 0.5107, "step": 9140 }, { "epoch": 1.16, "grad_norm": 0.5423788817410247, "learning_rate": 3.91873914392312e-06, "loss": 0.4407, "step": 9141 }, { "epoch": 1.16, "grad_norm": 0.5877952194569003, "learning_rate": 3.917731990594112e-06, "loss": 0.4764, "step": 9142 }, { "epoch": 1.16, "grad_norm": 0.8564332508298254, "learning_rate": 3.916724883333796e-06, "loss": 0.5384, "step": 9143 }, { "epoch": 1.16, "grad_norm": 0.6409153378132896, "learning_rate": 3.9157178221850395e-06, "loss": 0.4464, "step": 9144 }, { "epoch": 1.17, "grad_norm": 0.6126027797882431, "learning_rate": 3.914710807190709e-06, "loss": 0.4721, "step": 9145 }, { "epoch": 1.17, "grad_norm": 0.6639521049575524, "learning_rate": 3.913703838393673e-06, "loss": 0.4303, "step": 9146 }, { "epoch": 1.17, "grad_norm": 0.7090278906173495, "learning_rate": 3.912696915836794e-06, "loss": 0.4937, "step": 9147 }, { "epoch": 1.17, "grad_norm": 0.621386579863087, "learning_rate": 3.911690039562931e-06, "loss": 0.4277, "step": 9148 }, { "epoch": 1.17, "grad_norm": 0.5783861492710738, "learning_rate": 3.910683209614946e-06, "loss": 0.4439, "step": 9149 }, { "epoch": 1.17, "grad_norm": 0.6201388495073825, "learning_rate": 3.909676426035696e-06, "loss": 0.4661, "step": 9150 }, { "epoch": 1.17, "grad_norm": 0.6921068944146899, "learning_rate": 3.9086696888680365e-06, "loss": 0.5297, "step": 9151 }, { "epoch": 1.17, "grad_norm": 0.697164699611184, "learning_rate": 3.90766299815482e-06, "loss": 0.4584, "step": 9152 }, { "epoch": 1.17, "grad_norm": 0.8065126838874935, "learning_rate": 3.9066563539389e-06, "loss": 0.5355, "step": 9153 }, { "epoch": 1.17, "grad_norm": 0.6212464792366286, "learning_rate": 3.905649756263125e-06, "loss": 0.4656, "step": 9154 }, { "epoch": 1.17, "grad_norm": 0.5768977727785792, "learning_rate": 3.904643205170343e-06, "loss": 0.435, "step": 9155 }, { "epoch": 1.17, "grad_norm": 0.6472479220189596, "learning_rate": 3.903636700703399e-06, "loss": 0.4406, "step": 9156 }, { "epoch": 1.17, "grad_norm": 0.8617600728263312, "learning_rate": 3.902630242905138e-06, "loss": 0.5178, "step": 9157 }, { "epoch": 1.17, "grad_norm": 0.7154616592155875, "learning_rate": 3.901623831818398e-06, "loss": 0.5068, "step": 9158 }, { "epoch": 1.17, "grad_norm": 0.766981964880095, "learning_rate": 3.9006174674860245e-06, "loss": 0.4771, "step": 9159 }, { "epoch": 1.17, "grad_norm": 0.7299910472352982, "learning_rate": 3.899611149950851e-06, "loss": 0.4874, "step": 9160 }, { "epoch": 1.17, "grad_norm": 0.6244080099514171, "learning_rate": 3.898604879255715e-06, "loss": 0.5004, "step": 9161 }, { "epoch": 1.17, "grad_norm": 0.7549839214226711, "learning_rate": 3.89759865544345e-06, "loss": 0.5253, "step": 9162 }, { "epoch": 1.17, "grad_norm": 0.6385097459167149, "learning_rate": 3.896592478556888e-06, "loss": 0.4846, "step": 9163 }, { "epoch": 1.17, "grad_norm": 0.8405263514369358, "learning_rate": 3.895586348638857e-06, "loss": 0.4935, "step": 9164 }, { "epoch": 1.17, "grad_norm": 0.7399865673635089, "learning_rate": 3.894580265732187e-06, "loss": 0.538, "step": 9165 }, { "epoch": 1.17, "grad_norm": 0.6681394900829308, "learning_rate": 3.893574229879702e-06, "loss": 0.5062, "step": 9166 }, { "epoch": 1.17, "grad_norm": 0.6105976858829424, "learning_rate": 3.892568241124227e-06, "loss": 0.4575, "step": 9167 }, { "epoch": 1.17, "grad_norm": 0.5858101445608391, "learning_rate": 3.891562299508582e-06, "loss": 0.5049, "step": 9168 }, { "epoch": 1.17, "grad_norm": 0.6234820883859475, "learning_rate": 3.890556405075589e-06, "loss": 0.4336, "step": 9169 }, { "epoch": 1.17, "grad_norm": 0.7714491599082312, "learning_rate": 3.889550557868062e-06, "loss": 0.4954, "step": 9170 }, { "epoch": 1.17, "grad_norm": 0.7779044214780643, "learning_rate": 3.888544757928821e-06, "loss": 0.5294, "step": 9171 }, { "epoch": 1.17, "grad_norm": 0.7271296509389674, "learning_rate": 3.887539005300679e-06, "loss": 0.5461, "step": 9172 }, { "epoch": 1.17, "grad_norm": 0.7822856196753434, "learning_rate": 3.886533300026446e-06, "loss": 0.573, "step": 9173 }, { "epoch": 1.17, "grad_norm": 0.733223695507951, "learning_rate": 3.885527642148932e-06, "loss": 0.4985, "step": 9174 }, { "epoch": 1.17, "grad_norm": 0.7246966885432375, "learning_rate": 3.884522031710946e-06, "loss": 0.4717, "step": 9175 }, { "epoch": 1.17, "grad_norm": 0.6986339145254724, "learning_rate": 3.883516468755291e-06, "loss": 0.5033, "step": 9176 }, { "epoch": 1.17, "grad_norm": 0.6089393751958287, "learning_rate": 3.882510953324773e-06, "loss": 0.5221, "step": 9177 }, { "epoch": 1.17, "grad_norm": 2.3038668661326622, "learning_rate": 3.881505485462192e-06, "loss": 0.5175, "step": 9178 }, { "epoch": 1.17, "grad_norm": 0.6827360566959162, "learning_rate": 3.88050006521035e-06, "loss": 0.5045, "step": 9179 }, { "epoch": 1.17, "grad_norm": 0.5906429046719849, "learning_rate": 3.879494692612041e-06, "loss": 0.4377, "step": 9180 }, { "epoch": 1.17, "grad_norm": 0.7615171443245319, "learning_rate": 3.878489367710063e-06, "loss": 0.5454, "step": 9181 }, { "epoch": 1.17, "grad_norm": 0.7664159803596445, "learning_rate": 3.877484090547207e-06, "loss": 0.5827, "step": 9182 }, { "epoch": 1.17, "grad_norm": 0.6761907687636795, "learning_rate": 3.876478861166269e-06, "loss": 0.5063, "step": 9183 }, { "epoch": 1.17, "grad_norm": 0.7629195482273305, "learning_rate": 3.875473679610034e-06, "loss": 0.503, "step": 9184 }, { "epoch": 1.17, "grad_norm": 0.6107465754141232, "learning_rate": 3.874468545921292e-06, "loss": 0.4959, "step": 9185 }, { "epoch": 1.17, "grad_norm": 0.5943484056256512, "learning_rate": 3.873463460142827e-06, "loss": 0.476, "step": 9186 }, { "epoch": 1.17, "grad_norm": 15.456930780097185, "learning_rate": 3.872458422317422e-06, "loss": 0.5605, "step": 9187 }, { "epoch": 1.17, "grad_norm": 0.6893922390539317, "learning_rate": 3.871453432487859e-06, "loss": 0.5372, "step": 9188 }, { "epoch": 1.17, "grad_norm": 0.7219061276696814, "learning_rate": 3.870448490696918e-06, "loss": 0.5353, "step": 9189 }, { "epoch": 1.17, "grad_norm": 0.6049954437753662, "learning_rate": 3.869443596987374e-06, "loss": 0.4549, "step": 9190 }, { "epoch": 1.17, "grad_norm": 0.6344372661780289, "learning_rate": 3.868438751402005e-06, "loss": 0.5042, "step": 9191 }, { "epoch": 1.17, "grad_norm": 0.6941526922751737, "learning_rate": 3.867433953983582e-06, "loss": 0.5014, "step": 9192 }, { "epoch": 1.17, "grad_norm": 0.6881241446546059, "learning_rate": 3.8664292047748755e-06, "loss": 0.5136, "step": 9193 }, { "epoch": 1.17, "grad_norm": 0.6162241616900622, "learning_rate": 3.8654245038186556e-06, "loss": 0.4564, "step": 9194 }, { "epoch": 1.17, "grad_norm": 0.6233690103539943, "learning_rate": 3.864419851157688e-06, "loss": 0.4384, "step": 9195 }, { "epoch": 1.17, "grad_norm": 0.9418688162080785, "learning_rate": 3.86341524683474e-06, "loss": 0.5472, "step": 9196 }, { "epoch": 1.17, "grad_norm": 0.8691137996594228, "learning_rate": 3.862410690892572e-06, "loss": 0.5088, "step": 9197 }, { "epoch": 1.17, "grad_norm": 0.8002155652223986, "learning_rate": 3.8614061833739465e-06, "loss": 0.4869, "step": 9198 }, { "epoch": 1.17, "grad_norm": 0.7042408303696801, "learning_rate": 3.86040172432162e-06, "loss": 0.5247, "step": 9199 }, { "epoch": 1.17, "grad_norm": 0.5657596663045861, "learning_rate": 3.859397313778352e-06, "loss": 0.4336, "step": 9200 }, { "epoch": 1.17, "grad_norm": 0.6596114493968095, "learning_rate": 3.858392951786895e-06, "loss": 0.4478, "step": 9201 }, { "epoch": 1.17, "grad_norm": 0.6281678243658037, "learning_rate": 3.857388638390001e-06, "loss": 0.4427, "step": 9202 }, { "epoch": 1.17, "grad_norm": 0.6567881099577151, "learning_rate": 3.856384373630424e-06, "loss": 0.471, "step": 9203 }, { "epoch": 1.17, "grad_norm": 0.6881564327469285, "learning_rate": 3.855380157550907e-06, "loss": 0.495, "step": 9204 }, { "epoch": 1.17, "grad_norm": 0.7740229586344956, "learning_rate": 3.854375990194201e-06, "loss": 0.5614, "step": 9205 }, { "epoch": 1.17, "grad_norm": 0.7221591605665109, "learning_rate": 3.853371871603048e-06, "loss": 0.5433, "step": 9206 }, { "epoch": 1.17, "grad_norm": 0.6764043248639546, "learning_rate": 3.852367801820188e-06, "loss": 0.519, "step": 9207 }, { "epoch": 1.17, "grad_norm": 0.8623854662145037, "learning_rate": 3.851363780888365e-06, "loss": 0.5967, "step": 9208 }, { "epoch": 1.17, "grad_norm": 0.7071161215088375, "learning_rate": 3.8503598088503155e-06, "loss": 0.5319, "step": 9209 }, { "epoch": 1.17, "grad_norm": 0.7849210922790041, "learning_rate": 3.849355885748775e-06, "loss": 0.4994, "step": 9210 }, { "epoch": 1.17, "grad_norm": 0.5831460860868768, "learning_rate": 3.848352011626478e-06, "loss": 0.4645, "step": 9211 }, { "epoch": 1.17, "grad_norm": 0.5863534498201394, "learning_rate": 3.847348186526156e-06, "loss": 0.4421, "step": 9212 }, { "epoch": 1.17, "grad_norm": 0.5815739507435796, "learning_rate": 3.846344410490538e-06, "loss": 0.4289, "step": 9213 }, { "epoch": 1.17, "grad_norm": 0.5997127376286291, "learning_rate": 3.845340683562352e-06, "loss": 0.4683, "step": 9214 }, { "epoch": 1.17, "grad_norm": 0.5790066090762571, "learning_rate": 3.844337005784322e-06, "loss": 0.432, "step": 9215 }, { "epoch": 1.17, "grad_norm": 0.6239345958018846, "learning_rate": 3.843333377199173e-06, "loss": 0.4789, "step": 9216 }, { "epoch": 1.17, "grad_norm": 0.8485273250638531, "learning_rate": 3.8423297978496274e-06, "loss": 0.5108, "step": 9217 }, { "epoch": 1.17, "grad_norm": 0.5536674072142241, "learning_rate": 3.841326267778403e-06, "loss": 0.4159, "step": 9218 }, { "epoch": 1.17, "grad_norm": 0.6455557634131391, "learning_rate": 3.840322787028216e-06, "loss": 0.4017, "step": 9219 }, { "epoch": 1.17, "grad_norm": 0.617889907250975, "learning_rate": 3.83931935564178e-06, "loss": 0.4648, "step": 9220 }, { "epoch": 1.17, "grad_norm": 0.7682929414251215, "learning_rate": 3.838315973661812e-06, "loss": 0.5482, "step": 9221 }, { "epoch": 1.17, "grad_norm": 0.7356949883681463, "learning_rate": 3.83731264113102e-06, "loss": 0.5363, "step": 9222 }, { "epoch": 1.17, "grad_norm": 0.7679844367820574, "learning_rate": 3.836309358092115e-06, "loss": 0.5493, "step": 9223 }, { "epoch": 1.18, "grad_norm": 0.8391703098611941, "learning_rate": 3.835306124587801e-06, "loss": 0.5955, "step": 9224 }, { "epoch": 1.18, "grad_norm": 0.6816100992227841, "learning_rate": 3.834302940660784e-06, "loss": 0.5169, "step": 9225 }, { "epoch": 1.18, "grad_norm": 0.6352445804077987, "learning_rate": 3.8332998063537656e-06, "loss": 0.4862, "step": 9226 }, { "epoch": 1.18, "grad_norm": 0.7416147191657123, "learning_rate": 3.832296721709447e-06, "loss": 0.4999, "step": 9227 }, { "epoch": 1.18, "grad_norm": 0.6579608237460424, "learning_rate": 3.831293686770524e-06, "loss": 0.4468, "step": 9228 }, { "epoch": 1.18, "grad_norm": 0.6927788150118354, "learning_rate": 3.830290701579695e-06, "loss": 0.4558, "step": 9229 }, { "epoch": 1.18, "grad_norm": 0.7305368181712655, "learning_rate": 3.829287766179653e-06, "loss": 0.4848, "step": 9230 }, { "epoch": 1.18, "grad_norm": 0.809619839555621, "learning_rate": 3.82828488061309e-06, "loss": 0.4916, "step": 9231 }, { "epoch": 1.18, "grad_norm": 1.2811635489446969, "learning_rate": 3.827282044922692e-06, "loss": 0.4731, "step": 9232 }, { "epoch": 1.18, "grad_norm": 0.6003447374299552, "learning_rate": 3.826279259151153e-06, "loss": 0.4821, "step": 9233 }, { "epoch": 1.18, "grad_norm": 0.7247723980490272, "learning_rate": 3.8252765233411545e-06, "loss": 0.445, "step": 9234 }, { "epoch": 1.18, "grad_norm": 0.6432466770235389, "learning_rate": 3.824273837535381e-06, "loss": 0.4595, "step": 9235 }, { "epoch": 1.18, "grad_norm": 0.8148596041799379, "learning_rate": 3.8232712017765136e-06, "loss": 0.569, "step": 9236 }, { "epoch": 1.18, "grad_norm": 0.8379815591291666, "learning_rate": 3.822268616107231e-06, "loss": 0.4709, "step": 9237 }, { "epoch": 1.18, "grad_norm": 0.6596493905406737, "learning_rate": 3.821266080570209e-06, "loss": 0.453, "step": 9238 }, { "epoch": 1.18, "grad_norm": 0.7008662707900963, "learning_rate": 3.8202635952081235e-06, "loss": 0.4747, "step": 9239 }, { "epoch": 1.18, "grad_norm": 0.6988174692851142, "learning_rate": 3.8192611600636475e-06, "loss": 0.4747, "step": 9240 }, { "epoch": 1.18, "grad_norm": 0.7049989135272603, "learning_rate": 3.81825877517945e-06, "loss": 0.5187, "step": 9241 }, { "epoch": 1.18, "grad_norm": 0.6617713091278571, "learning_rate": 3.8172564405982e-06, "loss": 0.5134, "step": 9242 }, { "epoch": 1.18, "grad_norm": 0.6510452903445328, "learning_rate": 3.816254156362565e-06, "loss": 0.5014, "step": 9243 }, { "epoch": 1.18, "grad_norm": 0.7906560317435627, "learning_rate": 3.815251922515205e-06, "loss": 0.5015, "step": 9244 }, { "epoch": 1.18, "grad_norm": 0.8100131437901296, "learning_rate": 3.814249739098787e-06, "loss": 0.5174, "step": 9245 }, { "epoch": 1.18, "grad_norm": 0.5886019392354486, "learning_rate": 3.8132476061559683e-06, "loss": 0.4444, "step": 9246 }, { "epoch": 1.18, "grad_norm": 0.6982257396631043, "learning_rate": 3.8122455237294065e-06, "loss": 0.4943, "step": 9247 }, { "epoch": 1.18, "grad_norm": 0.7774025952607653, "learning_rate": 3.811243491861758e-06, "loss": 0.5106, "step": 9248 }, { "epoch": 1.18, "grad_norm": 0.8108360143504353, "learning_rate": 3.8102415105956746e-06, "loss": 0.5042, "step": 9249 }, { "epoch": 1.18, "grad_norm": 0.7335364714964374, "learning_rate": 3.8092395799738084e-06, "loss": 0.5216, "step": 9250 }, { "epoch": 1.18, "grad_norm": 0.7635895452982631, "learning_rate": 3.8082377000388083e-06, "loss": 0.447, "step": 9251 }, { "epoch": 1.18, "grad_norm": 0.7662973614194442, "learning_rate": 3.807235870833321e-06, "loss": 0.5446, "step": 9252 }, { "epoch": 1.18, "grad_norm": 0.7325856723178641, "learning_rate": 3.8062340923999906e-06, "loss": 0.5315, "step": 9253 }, { "epoch": 1.18, "grad_norm": 0.7968720456675714, "learning_rate": 3.8052323647814604e-06, "loss": 0.5503, "step": 9254 }, { "epoch": 1.18, "grad_norm": 0.7279809041346655, "learning_rate": 3.8042306880203706e-06, "loss": 0.4815, "step": 9255 }, { "epoch": 1.18, "grad_norm": 0.6143506890845476, "learning_rate": 3.803229062159359e-06, "loss": 0.4539, "step": 9256 }, { "epoch": 1.18, "grad_norm": 0.6321351488723453, "learning_rate": 3.8022274872410614e-06, "loss": 0.5123, "step": 9257 }, { "epoch": 1.18, "grad_norm": 0.7688371835938946, "learning_rate": 3.8012259633081133e-06, "loss": 0.5241, "step": 9258 }, { "epoch": 1.18, "grad_norm": 0.655250172919114, "learning_rate": 3.800224490403145e-06, "loss": 0.4987, "step": 9259 }, { "epoch": 1.18, "grad_norm": 0.7011900613464648, "learning_rate": 3.7992230685687845e-06, "loss": 0.5243, "step": 9260 }, { "epoch": 1.18, "grad_norm": 0.7145118081784876, "learning_rate": 3.798221697847663e-06, "loss": 0.4575, "step": 9261 }, { "epoch": 1.18, "grad_norm": 0.5398549303443795, "learning_rate": 3.7972203782824023e-06, "loss": 0.4333, "step": 9262 }, { "epoch": 1.18, "grad_norm": 0.8049330864013907, "learning_rate": 3.7962191099156277e-06, "loss": 0.4919, "step": 9263 }, { "epoch": 1.18, "grad_norm": 0.7253450753326646, "learning_rate": 3.795217892789958e-06, "loss": 0.5375, "step": 9264 }, { "epoch": 1.18, "grad_norm": 0.7160461422226329, "learning_rate": 3.7942167269480128e-06, "loss": 0.5535, "step": 9265 }, { "epoch": 1.18, "grad_norm": 0.7149180236707086, "learning_rate": 3.793215612432409e-06, "loss": 0.5485, "step": 9266 }, { "epoch": 1.18, "grad_norm": 0.7221360989074198, "learning_rate": 3.7922145492857597e-06, "loss": 0.5037, "step": 9267 }, { "epoch": 1.18, "grad_norm": 0.6515305820180698, "learning_rate": 3.7912135375506774e-06, "loss": 0.5286, "step": 9268 }, { "epoch": 1.18, "grad_norm": 0.7626597267477976, "learning_rate": 3.790212577269772e-06, "loss": 0.5919, "step": 9269 }, { "epoch": 1.18, "grad_norm": 0.6999679969840675, "learning_rate": 3.789211668485651e-06, "loss": 0.5081, "step": 9270 }, { "epoch": 1.18, "grad_norm": 0.6184696543695805, "learning_rate": 3.788210811240921e-06, "loss": 0.4932, "step": 9271 }, { "epoch": 1.18, "grad_norm": 0.8672299006600014, "learning_rate": 3.787210005578184e-06, "loss": 0.5358, "step": 9272 }, { "epoch": 1.18, "grad_norm": 1.1937749189019775, "learning_rate": 3.7862092515400416e-06, "loss": 0.5308, "step": 9273 }, { "epoch": 1.18, "grad_norm": 0.7309519734392892, "learning_rate": 3.7852085491690925e-06, "loss": 0.5436, "step": 9274 }, { "epoch": 1.18, "grad_norm": 1.2516075414710495, "learning_rate": 3.784207898507934e-06, "loss": 0.4916, "step": 9275 }, { "epoch": 1.18, "grad_norm": 0.6548281765815485, "learning_rate": 3.7832072995991597e-06, "loss": 0.4572, "step": 9276 }, { "epoch": 1.18, "grad_norm": 0.6568133177718134, "learning_rate": 3.782206752485363e-06, "loss": 0.44, "step": 9277 }, { "epoch": 1.18, "grad_norm": 0.580457937503196, "learning_rate": 3.781206257209133e-06, "loss": 0.4221, "step": 9278 }, { "epoch": 1.18, "grad_norm": 0.5952395096909264, "learning_rate": 3.7802058138130577e-06, "loss": 0.4786, "step": 9279 }, { "epoch": 1.18, "grad_norm": 0.7074837466214744, "learning_rate": 3.779205422339723e-06, "loss": 0.4749, "step": 9280 }, { "epoch": 1.18, "grad_norm": 0.7039139181821368, "learning_rate": 3.7782050828317123e-06, "loss": 0.471, "step": 9281 }, { "epoch": 1.18, "grad_norm": 0.8467539821181148, "learning_rate": 3.777204795331605e-06, "loss": 0.5201, "step": 9282 }, { "epoch": 1.18, "grad_norm": 0.6330137298086674, "learning_rate": 3.776204559881984e-06, "loss": 0.4785, "step": 9283 }, { "epoch": 1.18, "grad_norm": 0.7968895662099147, "learning_rate": 3.7752043765254248e-06, "loss": 0.5121, "step": 9284 }, { "epoch": 1.18, "grad_norm": 0.7022791635208689, "learning_rate": 3.7742042453045014e-06, "loss": 0.4924, "step": 9285 }, { "epoch": 1.18, "grad_norm": 0.779389833041283, "learning_rate": 3.773204166261785e-06, "loss": 0.4963, "step": 9286 }, { "epoch": 1.18, "grad_norm": 0.6689910998117216, "learning_rate": 3.772204139439848e-06, "loss": 0.5007, "step": 9287 }, { "epoch": 1.18, "grad_norm": 0.6497281707322332, "learning_rate": 3.771204164881257e-06, "loss": 0.5289, "step": 9288 }, { "epoch": 1.18, "grad_norm": 0.7494177434850453, "learning_rate": 3.7702042426285783e-06, "loss": 0.5086, "step": 9289 }, { "epoch": 1.18, "grad_norm": 0.5489349441227107, "learning_rate": 3.769204372724375e-06, "loss": 0.4177, "step": 9290 }, { "epoch": 1.18, "grad_norm": 0.7714326818527153, "learning_rate": 3.7682045552112084e-06, "loss": 0.4482, "step": 9291 }, { "epoch": 1.18, "grad_norm": 1.1313778681595092, "learning_rate": 3.7672047901316377e-06, "loss": 0.5338, "step": 9292 }, { "epoch": 1.18, "grad_norm": 0.7377633912869004, "learning_rate": 3.766205077528219e-06, "loss": 0.5001, "step": 9293 }, { "epoch": 1.18, "grad_norm": 0.7411550229133614, "learning_rate": 3.7652054174435065e-06, "loss": 0.5159, "step": 9294 }, { "epoch": 1.18, "grad_norm": 0.9335382124106479, "learning_rate": 3.7642058099200553e-06, "loss": 0.5174, "step": 9295 }, { "epoch": 1.18, "grad_norm": 0.6597320538383452, "learning_rate": 3.7632062550004135e-06, "loss": 0.4275, "step": 9296 }, { "epoch": 1.18, "grad_norm": 0.6055237739251296, "learning_rate": 3.7622067527271288e-06, "loss": 0.4492, "step": 9297 }, { "epoch": 1.18, "grad_norm": 0.8273284377787128, "learning_rate": 3.7612073031427484e-06, "loss": 0.5092, "step": 9298 }, { "epoch": 1.18, "grad_norm": 0.6844810617536631, "learning_rate": 3.7602079062898133e-06, "loss": 0.5355, "step": 9299 }, { "epoch": 1.18, "grad_norm": 0.8157952071011444, "learning_rate": 3.7592085622108666e-06, "loss": 0.501, "step": 9300 }, { "epoch": 1.18, "grad_norm": 0.5685104765345936, "learning_rate": 3.758209270948446e-06, "loss": 0.4738, "step": 9301 }, { "epoch": 1.19, "grad_norm": 0.6627901912611, "learning_rate": 3.7572100325450883e-06, "loss": 0.5089, "step": 9302 }, { "epoch": 1.19, "grad_norm": 0.7594495353184083, "learning_rate": 3.756210847043329e-06, "loss": 0.5367, "step": 9303 }, { "epoch": 1.19, "grad_norm": 0.7573227987303697, "learning_rate": 3.755211714485698e-06, "loss": 0.5125, "step": 9304 }, { "epoch": 1.19, "grad_norm": 0.8435916604362016, "learning_rate": 3.7542126349147277e-06, "loss": 0.5543, "step": 9305 }, { "epoch": 1.19, "grad_norm": 0.5922881331706207, "learning_rate": 3.753213608372942e-06, "loss": 0.4347, "step": 9306 }, { "epoch": 1.19, "grad_norm": 0.8723259026302791, "learning_rate": 3.752214634902871e-06, "loss": 0.4632, "step": 9307 }, { "epoch": 1.19, "grad_norm": 1.1746982021610626, "learning_rate": 3.751215714547036e-06, "loss": 0.5416, "step": 9308 }, { "epoch": 1.19, "grad_norm": 0.7410483229901984, "learning_rate": 3.7502168473479572e-06, "loss": 0.4885, "step": 9309 }, { "epoch": 1.19, "grad_norm": 0.6948772900516106, "learning_rate": 3.749218033348153e-06, "loss": 0.4579, "step": 9310 }, { "epoch": 1.19, "grad_norm": 0.9095252001287359, "learning_rate": 3.748219272590141e-06, "loss": 0.5086, "step": 9311 }, { "epoch": 1.19, "grad_norm": 0.6682306627158009, "learning_rate": 3.747220565116434e-06, "loss": 0.4828, "step": 9312 }, { "epoch": 1.19, "grad_norm": 0.6265989931863598, "learning_rate": 3.7462219109695443e-06, "loss": 0.4547, "step": 9313 }, { "epoch": 1.19, "grad_norm": 0.6501863023514232, "learning_rate": 3.7452233101919815e-06, "loss": 0.5018, "step": 9314 }, { "epoch": 1.19, "grad_norm": 0.6557278141796754, "learning_rate": 3.744224762826253e-06, "loss": 0.4884, "step": 9315 }, { "epoch": 1.19, "grad_norm": 0.5893417255077114, "learning_rate": 3.7432262689148625e-06, "loss": 0.4312, "step": 9316 }, { "epoch": 1.19, "grad_norm": 0.5455050742315564, "learning_rate": 3.7422278285003145e-06, "loss": 0.4005, "step": 9317 }, { "epoch": 1.19, "grad_norm": 0.6535238794781585, "learning_rate": 3.7412294416251084e-06, "loss": 0.4468, "step": 9318 }, { "epoch": 1.19, "grad_norm": 0.6805247278074009, "learning_rate": 3.7402311083317413e-06, "loss": 0.4741, "step": 9319 }, { "epoch": 1.19, "grad_norm": 0.8638228911588726, "learning_rate": 3.739232828662711e-06, "loss": 0.563, "step": 9320 }, { "epoch": 1.19, "grad_norm": 0.8193523934847567, "learning_rate": 3.7382346026605108e-06, "loss": 0.5108, "step": 9321 }, { "epoch": 1.19, "grad_norm": 0.8829775620531921, "learning_rate": 3.73723643036763e-06, "loss": 0.4882, "step": 9322 }, { "epoch": 1.19, "grad_norm": 0.5870229389234177, "learning_rate": 3.736238311826561e-06, "loss": 0.4887, "step": 9323 }, { "epoch": 1.19, "grad_norm": 0.6344721944487332, "learning_rate": 3.735240247079788e-06, "loss": 0.505, "step": 9324 }, { "epoch": 1.19, "grad_norm": 0.6926947312885636, "learning_rate": 3.7342422361697967e-06, "loss": 0.5369, "step": 9325 }, { "epoch": 1.19, "grad_norm": 0.7269810014722693, "learning_rate": 3.733244279139068e-06, "loss": 0.4839, "step": 9326 }, { "epoch": 1.19, "grad_norm": 0.610876673986871, "learning_rate": 3.732246376030083e-06, "loss": 0.5129, "step": 9327 }, { "epoch": 1.19, "grad_norm": 0.7641989057294315, "learning_rate": 3.7312485268853184e-06, "loss": 0.5836, "step": 9328 }, { "epoch": 1.19, "grad_norm": 0.6870809711148789, "learning_rate": 3.7302507317472493e-06, "loss": 0.4601, "step": 9329 }, { "epoch": 1.19, "grad_norm": 0.6202871951475808, "learning_rate": 3.7292529906583487e-06, "loss": 0.5205, "step": 9330 }, { "epoch": 1.19, "grad_norm": 0.7142741815292583, "learning_rate": 3.728255303661087e-06, "loss": 0.5074, "step": 9331 }, { "epoch": 1.19, "grad_norm": 0.5518207667321977, "learning_rate": 3.7272576707979345e-06, "loss": 0.4166, "step": 9332 }, { "epoch": 1.19, "grad_norm": 0.6242109176807179, "learning_rate": 3.7262600921113555e-06, "loss": 0.5183, "step": 9333 }, { "epoch": 1.19, "grad_norm": 0.7938759588234392, "learning_rate": 3.7252625676438136e-06, "loss": 0.5432, "step": 9334 }, { "epoch": 1.19, "grad_norm": 0.7667275698528835, "learning_rate": 3.7242650974377716e-06, "loss": 0.5116, "step": 9335 }, { "epoch": 1.19, "grad_norm": 0.651678924316612, "learning_rate": 3.7232676815356873e-06, "loss": 0.4523, "step": 9336 }, { "epoch": 1.19, "grad_norm": 0.8319730542316909, "learning_rate": 3.722270319980018e-06, "loss": 0.4918, "step": 9337 }, { "epoch": 1.19, "grad_norm": 0.6840768630249385, "learning_rate": 3.721273012813218e-06, "loss": 0.525, "step": 9338 }, { "epoch": 1.19, "grad_norm": 0.6739512175000334, "learning_rate": 3.72027576007774e-06, "loss": 0.4692, "step": 9339 }, { "epoch": 1.19, "grad_norm": 0.6622551503471159, "learning_rate": 3.7192785618160334e-06, "loss": 0.4986, "step": 9340 }, { "epoch": 1.19, "grad_norm": 0.7407529112361289, "learning_rate": 3.7182814180705458e-06, "loss": 0.5132, "step": 9341 }, { "epoch": 1.19, "grad_norm": 0.6822790479885465, "learning_rate": 3.7172843288837225e-06, "loss": 0.4565, "step": 9342 }, { "epoch": 1.19, "grad_norm": 0.6566713989972134, "learning_rate": 3.7162872942980054e-06, "loss": 0.4618, "step": 9343 }, { "epoch": 1.19, "grad_norm": 0.6037750803989895, "learning_rate": 3.7152903143558348e-06, "loss": 0.4513, "step": 9344 }, { "epoch": 1.19, "grad_norm": 0.7822450616837964, "learning_rate": 3.7142933890996524e-06, "loss": 0.4982, "step": 9345 }, { "epoch": 1.19, "grad_norm": 1.129460569369605, "learning_rate": 3.7132965185718916e-06, "loss": 0.5558, "step": 9346 }, { "epoch": 1.19, "grad_norm": 0.628171536754497, "learning_rate": 3.7122997028149867e-06, "loss": 0.4233, "step": 9347 }, { "epoch": 1.19, "grad_norm": 0.6393485393375593, "learning_rate": 3.7113029418713677e-06, "loss": 0.4723, "step": 9348 }, { "epoch": 1.19, "grad_norm": 0.5747234858157932, "learning_rate": 3.710306235783465e-06, "loss": 0.4561, "step": 9349 }, { "epoch": 1.19, "grad_norm": 0.59850375244888, "learning_rate": 3.7093095845937043e-06, "loss": 0.4571, "step": 9350 }, { "epoch": 1.19, "grad_norm": 0.7406742826051419, "learning_rate": 3.70831298834451e-06, "loss": 0.4595, "step": 9351 }, { "epoch": 1.19, "grad_norm": 0.8151994076648557, "learning_rate": 3.7073164470783053e-06, "loss": 0.49, "step": 9352 }, { "epoch": 1.19, "grad_norm": 0.6471894018644406, "learning_rate": 3.706319960837507e-06, "loss": 0.4833, "step": 9353 }, { "epoch": 1.19, "grad_norm": 0.7934390458550817, "learning_rate": 3.705323529664535e-06, "loss": 0.5053, "step": 9354 }, { "epoch": 1.19, "grad_norm": 0.6241388647738333, "learning_rate": 3.7043271536018033e-06, "loss": 0.4895, "step": 9355 }, { "epoch": 1.19, "grad_norm": 0.6094969019526522, "learning_rate": 3.7033308326917216e-06, "loss": 0.5041, "step": 9356 }, { "epoch": 1.19, "grad_norm": 0.6697576398262278, "learning_rate": 3.7023345669767047e-06, "loss": 0.468, "step": 9357 }, { "epoch": 1.19, "grad_norm": 0.5401047491143914, "learning_rate": 3.701338356499159e-06, "loss": 0.4228, "step": 9358 }, { "epoch": 1.19, "grad_norm": 0.583904784802937, "learning_rate": 3.7003422013014887e-06, "loss": 0.4863, "step": 9359 }, { "epoch": 1.19, "grad_norm": 0.9277342719980841, "learning_rate": 3.699346101426099e-06, "loss": 0.53, "step": 9360 }, { "epoch": 1.19, "grad_norm": 0.7715251346295013, "learning_rate": 3.698350056915389e-06, "loss": 0.5672, "step": 9361 }, { "epoch": 1.19, "grad_norm": 1.4160447402707952, "learning_rate": 3.6973540678117577e-06, "loss": 0.5241, "step": 9362 }, { "epoch": 1.19, "grad_norm": 0.5779429888948027, "learning_rate": 3.6963581341576005e-06, "loss": 0.4098, "step": 9363 }, { "epoch": 1.19, "grad_norm": 0.6637529487488397, "learning_rate": 3.695362255995312e-06, "loss": 0.5328, "step": 9364 }, { "epoch": 1.19, "grad_norm": 0.7283515188383624, "learning_rate": 3.6943664333672835e-06, "loss": 0.5162, "step": 9365 }, { "epoch": 1.19, "grad_norm": 0.6114779598947504, "learning_rate": 3.693370666315904e-06, "loss": 0.4714, "step": 9366 }, { "epoch": 1.19, "grad_norm": 0.6264892646274346, "learning_rate": 3.692374954883559e-06, "loss": 0.4412, "step": 9367 }, { "epoch": 1.19, "grad_norm": 0.5449127386935234, "learning_rate": 3.691379299112632e-06, "loss": 0.4392, "step": 9368 }, { "epoch": 1.19, "grad_norm": 0.6486514898573553, "learning_rate": 3.6903836990455087e-06, "loss": 0.449, "step": 9369 }, { "epoch": 1.19, "grad_norm": 0.7254549508570198, "learning_rate": 3.6893881547245658e-06, "loss": 0.4963, "step": 9370 }, { "epoch": 1.19, "grad_norm": 0.7918417743413897, "learning_rate": 3.688392666192182e-06, "loss": 0.4872, "step": 9371 }, { "epoch": 1.19, "grad_norm": 0.5722716888119294, "learning_rate": 3.6873972334907303e-06, "loss": 0.4483, "step": 9372 }, { "epoch": 1.19, "grad_norm": 0.6883569104658412, "learning_rate": 3.686401856662584e-06, "loss": 0.4523, "step": 9373 }, { "epoch": 1.19, "grad_norm": 0.6141976950426694, "learning_rate": 3.6854065357501133e-06, "loss": 0.4703, "step": 9374 }, { "epoch": 1.19, "grad_norm": 0.7885949112541654, "learning_rate": 3.6844112707956852e-06, "loss": 0.5138, "step": 9375 }, { "epoch": 1.19, "grad_norm": 1.5029589723466967, "learning_rate": 3.683416061841665e-06, "loss": 0.5571, "step": 9376 }, { "epoch": 1.19, "grad_norm": 0.6642345677986007, "learning_rate": 3.6824209089304163e-06, "loss": 0.5051, "step": 9377 }, { "epoch": 1.19, "grad_norm": 0.6578556018806611, "learning_rate": 3.6814258121042995e-06, "loss": 0.5162, "step": 9378 }, { "epoch": 1.19, "grad_norm": 0.6995339114569948, "learning_rate": 3.680430771405672e-06, "loss": 0.4896, "step": 9379 }, { "epoch": 1.19, "grad_norm": 0.5122522184510796, "learning_rate": 3.6794357868768895e-06, "loss": 0.4089, "step": 9380 }, { "epoch": 1.2, "grad_norm": 0.5171705227254032, "learning_rate": 3.6784408585603047e-06, "loss": 0.4283, "step": 9381 }, { "epoch": 1.2, "grad_norm": 0.578098014850233, "learning_rate": 3.677445986498271e-06, "loss": 0.4353, "step": 9382 }, { "epoch": 1.2, "grad_norm": 0.6636459750018662, "learning_rate": 3.6764511707331354e-06, "loss": 0.4648, "step": 9383 }, { "epoch": 1.2, "grad_norm": 0.7143341456500879, "learning_rate": 3.6754564113072423e-06, "loss": 0.541, "step": 9384 }, { "epoch": 1.2, "grad_norm": 0.623908847572535, "learning_rate": 3.6744617082629387e-06, "loss": 0.4844, "step": 9385 }, { "epoch": 1.2, "grad_norm": 0.7033864445488113, "learning_rate": 3.6734670616425638e-06, "loss": 0.551, "step": 9386 }, { "epoch": 1.2, "grad_norm": 0.7838962325925771, "learning_rate": 3.6724724714884574e-06, "loss": 0.5106, "step": 9387 }, { "epoch": 1.2, "grad_norm": 0.6528243369407437, "learning_rate": 3.6714779378429556e-06, "loss": 0.4701, "step": 9388 }, { "epoch": 1.2, "grad_norm": 0.659762743757758, "learning_rate": 3.670483460748393e-06, "loss": 0.4813, "step": 9389 }, { "epoch": 1.2, "grad_norm": 0.7722851731699196, "learning_rate": 3.6694890402471005e-06, "loss": 0.5959, "step": 9390 }, { "epoch": 1.2, "grad_norm": 0.8287668910847963, "learning_rate": 3.6684946763814078e-06, "loss": 0.5985, "step": 9391 }, { "epoch": 1.2, "grad_norm": 0.7381844742000775, "learning_rate": 3.6675003691936423e-06, "loss": 0.5472, "step": 9392 }, { "epoch": 1.2, "grad_norm": 0.7228108698755362, "learning_rate": 3.666506118726127e-06, "loss": 0.4872, "step": 9393 }, { "epoch": 1.2, "grad_norm": 0.6189659019990906, "learning_rate": 3.665511925021186e-06, "loss": 0.5008, "step": 9394 }, { "epoch": 1.2, "grad_norm": 0.6769923148269256, "learning_rate": 3.6645177881211375e-06, "loss": 0.476, "step": 9395 }, { "epoch": 1.2, "grad_norm": 0.7445779339363285, "learning_rate": 3.6635237080682994e-06, "loss": 0.4593, "step": 9396 }, { "epoch": 1.2, "grad_norm": 0.5909218732290121, "learning_rate": 3.6625296849049863e-06, "loss": 0.4433, "step": 9397 }, { "epoch": 1.2, "grad_norm": 0.7741790992713794, "learning_rate": 3.661535718673511e-06, "loss": 0.5006, "step": 9398 }, { "epoch": 1.2, "grad_norm": 0.6337812616324129, "learning_rate": 3.660541809416182e-06, "loss": 0.527, "step": 9399 }, { "epoch": 1.2, "grad_norm": 0.8041060394041263, "learning_rate": 3.659547957175309e-06, "loss": 0.4815, "step": 9400 }, { "epoch": 1.2, "grad_norm": 0.6972959503201985, "learning_rate": 3.6585541619931953e-06, "loss": 0.5485, "step": 9401 }, { "epoch": 1.2, "grad_norm": 0.5640436797815905, "learning_rate": 3.6575604239121444e-06, "loss": 0.4168, "step": 9402 }, { "epoch": 1.2, "grad_norm": 0.8431772662329683, "learning_rate": 3.6565667429744565e-06, "loss": 0.4642, "step": 9403 }, { "epoch": 1.2, "grad_norm": 0.787329924089397, "learning_rate": 3.6555731192224294e-06, "loss": 0.5255, "step": 9404 }, { "epoch": 1.2, "grad_norm": 0.7807554087944258, "learning_rate": 3.6545795526983584e-06, "loss": 0.562, "step": 9405 }, { "epoch": 1.2, "grad_norm": 0.5846642178958515, "learning_rate": 3.6535860434445343e-06, "loss": 0.462, "step": 9406 }, { "epoch": 1.2, "grad_norm": 0.5956953735044148, "learning_rate": 3.652592591503252e-06, "loss": 0.5001, "step": 9407 }, { "epoch": 1.2, "grad_norm": 0.6812276395809893, "learning_rate": 3.6515991969167975e-06, "loss": 0.4857, "step": 9408 }, { "epoch": 1.2, "grad_norm": 0.6365016702927165, "learning_rate": 3.6506058597274556e-06, "loss": 0.4019, "step": 9409 }, { "epoch": 1.2, "grad_norm": 0.5736335716129107, "learning_rate": 3.6496125799775113e-06, "loss": 0.4804, "step": 9410 }, { "epoch": 1.2, "grad_norm": 0.7305932920688418, "learning_rate": 3.648619357709243e-06, "loss": 0.4882, "step": 9411 }, { "epoch": 1.2, "grad_norm": 0.6085317276675052, "learning_rate": 3.6476261929649316e-06, "loss": 0.4291, "step": 9412 }, { "epoch": 1.2, "grad_norm": 0.6984835121280041, "learning_rate": 3.646633085786851e-06, "loss": 0.508, "step": 9413 }, { "epoch": 1.2, "grad_norm": 0.8424748932724646, "learning_rate": 3.6456400362172752e-06, "loss": 0.5327, "step": 9414 }, { "epoch": 1.2, "grad_norm": 0.7425466890467683, "learning_rate": 3.644647044298475e-06, "loss": 0.5255, "step": 9415 }, { "epoch": 1.2, "grad_norm": 0.7057164369499038, "learning_rate": 3.643654110072719e-06, "loss": 0.5572, "step": 9416 }, { "epoch": 1.2, "grad_norm": 0.6238592867457389, "learning_rate": 3.6426612335822735e-06, "loss": 0.4244, "step": 9417 }, { "epoch": 1.2, "grad_norm": 0.6145404264522862, "learning_rate": 3.641668414869399e-06, "loss": 0.4653, "step": 9418 }, { "epoch": 1.2, "grad_norm": 0.7842686843129191, "learning_rate": 3.6406756539763633e-06, "loss": 0.5227, "step": 9419 }, { "epoch": 1.2, "grad_norm": 0.8000686490823119, "learning_rate": 3.6396829509454203e-06, "loss": 0.4634, "step": 9420 }, { "epoch": 1.2, "grad_norm": 0.6307034047154679, "learning_rate": 3.638690305818826e-06, "loss": 0.4187, "step": 9421 }, { "epoch": 1.2, "grad_norm": 0.6217178830330693, "learning_rate": 3.637697718638837e-06, "loss": 0.4792, "step": 9422 }, { "epoch": 1.2, "grad_norm": 0.7085604471968104, "learning_rate": 3.6367051894477017e-06, "loss": 0.4821, "step": 9423 }, { "epoch": 1.2, "grad_norm": 0.624567134615195, "learning_rate": 3.6357127182876705e-06, "loss": 0.4685, "step": 9424 }, { "epoch": 1.2, "grad_norm": 0.583034659519317, "learning_rate": 3.6347203052009895e-06, "loss": 0.4229, "step": 9425 }, { "epoch": 1.2, "grad_norm": 0.5609245545654077, "learning_rate": 3.6337279502299017e-06, "loss": 0.4346, "step": 9426 }, { "epoch": 1.2, "grad_norm": 0.6057397309858307, "learning_rate": 3.632735653416649e-06, "loss": 0.4746, "step": 9427 }, { "epoch": 1.2, "grad_norm": 0.7008415882385254, "learning_rate": 3.6317434148034714e-06, "loss": 0.4976, "step": 9428 }, { "epoch": 1.2, "grad_norm": 0.600971276732898, "learning_rate": 3.6307512344326034e-06, "loss": 0.4568, "step": 9429 }, { "epoch": 1.2, "grad_norm": 0.6374964222218062, "learning_rate": 3.629759112346278e-06, "loss": 0.3921, "step": 9430 }, { "epoch": 1.2, "grad_norm": 0.6207756351778158, "learning_rate": 3.6287670485867306e-06, "loss": 0.4926, "step": 9431 }, { "epoch": 1.2, "grad_norm": 0.854422474203202, "learning_rate": 3.6277750431961877e-06, "loss": 0.5187, "step": 9432 }, { "epoch": 1.2, "grad_norm": 0.9067806553141458, "learning_rate": 3.626783096216877e-06, "loss": 0.5027, "step": 9433 }, { "epoch": 1.2, "grad_norm": 0.7618917669033753, "learning_rate": 3.6257912076910214e-06, "loss": 0.5116, "step": 9434 }, { "epoch": 1.2, "grad_norm": 0.7451529824388118, "learning_rate": 3.6247993776608426e-06, "loss": 0.4833, "step": 9435 }, { "epoch": 1.2, "grad_norm": 0.5961462216264772, "learning_rate": 3.6238076061685593e-06, "loss": 0.4642, "step": 9436 }, { "epoch": 1.2, "grad_norm": 0.7265659719787936, "learning_rate": 3.6228158932563895e-06, "loss": 0.5014, "step": 9437 }, { "epoch": 1.2, "grad_norm": 0.7626185257371304, "learning_rate": 3.6218242389665457e-06, "loss": 0.5255, "step": 9438 }, { "epoch": 1.2, "grad_norm": 0.8052022781133142, "learning_rate": 3.62083264334124e-06, "loss": 0.5387, "step": 9439 }, { "epoch": 1.2, "grad_norm": 0.7209810536499134, "learning_rate": 3.619841106422682e-06, "loss": 0.5311, "step": 9440 }, { "epoch": 1.2, "grad_norm": 0.7741278287168436, "learning_rate": 3.6188496282530774e-06, "loss": 0.481, "step": 9441 }, { "epoch": 1.2, "grad_norm": 0.7068216646728024, "learning_rate": 3.6178582088746307e-06, "loss": 0.4893, "step": 9442 }, { "epoch": 1.2, "grad_norm": 0.8401408212994929, "learning_rate": 3.616866848329542e-06, "loss": 0.4868, "step": 9443 }, { "epoch": 1.2, "grad_norm": 0.7424489850453143, "learning_rate": 3.615875546660013e-06, "loss": 0.4622, "step": 9444 }, { "epoch": 1.2, "grad_norm": 0.5707105923105024, "learning_rate": 3.6148843039082394e-06, "loss": 0.4654, "step": 9445 }, { "epoch": 1.2, "grad_norm": 0.5480934382977639, "learning_rate": 3.613893120116413e-06, "loss": 0.4318, "step": 9446 }, { "epoch": 1.2, "grad_norm": 0.6719126192826061, "learning_rate": 3.6129019953267285e-06, "loss": 0.4529, "step": 9447 }, { "epoch": 1.2, "grad_norm": 0.577864508267278, "learning_rate": 3.611910929581373e-06, "loss": 0.4608, "step": 9448 }, { "epoch": 1.2, "grad_norm": 0.7205605247021227, "learning_rate": 3.6109199229225346e-06, "loss": 0.4669, "step": 9449 }, { "epoch": 1.2, "grad_norm": 0.9227561583055726, "learning_rate": 3.609928975392395e-06, "loss": 0.5241, "step": 9450 }, { "epoch": 1.2, "grad_norm": 0.8264774168956006, "learning_rate": 3.6089380870331377e-06, "loss": 0.5459, "step": 9451 }, { "epoch": 1.2, "grad_norm": 0.6348407077098837, "learning_rate": 3.6079472578869405e-06, "loss": 0.4165, "step": 9452 }, { "epoch": 1.2, "grad_norm": 0.6035479530103958, "learning_rate": 3.6069564879959805e-06, "loss": 0.4698, "step": 9453 }, { "epoch": 1.2, "grad_norm": 1.0151899137145066, "learning_rate": 3.605965777402431e-06, "loss": 0.5063, "step": 9454 }, { "epoch": 1.2, "grad_norm": 0.7272700147929374, "learning_rate": 3.6049751261484624e-06, "loss": 0.5454, "step": 9455 }, { "epoch": 1.2, "grad_norm": 0.8989338736016839, "learning_rate": 3.6039845342762464e-06, "loss": 0.5116, "step": 9456 }, { "epoch": 1.2, "grad_norm": 0.6306512603646491, "learning_rate": 3.6029940018279474e-06, "loss": 0.4634, "step": 9457 }, { "epoch": 1.2, "grad_norm": 0.6193870711223556, "learning_rate": 3.60200352884573e-06, "loss": 0.5318, "step": 9458 }, { "epoch": 1.21, "grad_norm": 0.5828212196482788, "learning_rate": 3.6010131153717544e-06, "loss": 0.4574, "step": 9459 }, { "epoch": 1.21, "grad_norm": 0.5727501490341228, "learning_rate": 3.6000227614481797e-06, "loss": 0.5265, "step": 9460 }, { "epoch": 1.21, "grad_norm": 0.678357157869081, "learning_rate": 3.599032467117163e-06, "loss": 0.5516, "step": 9461 }, { "epoch": 1.21, "grad_norm": 0.5644482035468292, "learning_rate": 3.5980422324208576e-06, "loss": 0.4234, "step": 9462 }, { "epoch": 1.21, "grad_norm": 0.6079344633391223, "learning_rate": 3.597052057401414e-06, "loss": 0.4265, "step": 9463 }, { "epoch": 1.21, "grad_norm": 0.5825564950202775, "learning_rate": 3.596061942100981e-06, "loss": 0.4027, "step": 9464 }, { "epoch": 1.21, "grad_norm": 0.6088844888324194, "learning_rate": 3.5950718865617052e-06, "loss": 0.4967, "step": 9465 }, { "epoch": 1.21, "grad_norm": 0.6896780299535039, "learning_rate": 3.594081890825729e-06, "loss": 0.4752, "step": 9466 }, { "epoch": 1.21, "grad_norm": 0.7094657172600143, "learning_rate": 3.593091954935194e-06, "loss": 0.5189, "step": 9467 }, { "epoch": 1.21, "grad_norm": 0.7255489766048862, "learning_rate": 3.592102078932237e-06, "loss": 0.6105, "step": 9468 }, { "epoch": 1.21, "grad_norm": 0.5335851202317338, "learning_rate": 3.5911122628589977e-06, "loss": 0.4162, "step": 9469 }, { "epoch": 1.21, "grad_norm": 0.958267793174056, "learning_rate": 3.5901225067576073e-06, "loss": 0.469, "step": 9470 }, { "epoch": 1.21, "grad_norm": 0.8552163739072568, "learning_rate": 3.589132810670196e-06, "loss": 0.5468, "step": 9471 }, { "epoch": 1.21, "grad_norm": 1.1334336176113844, "learning_rate": 3.588143174638893e-06, "loss": 0.5105, "step": 9472 }, { "epoch": 1.21, "grad_norm": 0.6796844741501856, "learning_rate": 3.5871535987058226e-06, "loss": 0.4622, "step": 9473 }, { "epoch": 1.21, "grad_norm": 0.6019998325886428, "learning_rate": 3.5861640829131096e-06, "loss": 0.4492, "step": 9474 }, { "epoch": 1.21, "grad_norm": 0.6671691601084976, "learning_rate": 3.585174627302873e-06, "loss": 0.4612, "step": 9475 }, { "epoch": 1.21, "grad_norm": 0.6137077765714484, "learning_rate": 3.584185231917231e-06, "loss": 0.4189, "step": 9476 }, { "epoch": 1.21, "grad_norm": 0.6408745021382739, "learning_rate": 3.5831958967983004e-06, "loss": 0.4773, "step": 9477 }, { "epoch": 1.21, "grad_norm": 0.9151327707390334, "learning_rate": 3.5822066219881922e-06, "loss": 0.5135, "step": 9478 }, { "epoch": 1.21, "grad_norm": 0.7123509014075077, "learning_rate": 3.5812174075290185e-06, "loss": 0.4742, "step": 9479 }, { "epoch": 1.21, "grad_norm": 0.6151227884910276, "learning_rate": 3.580228253462883e-06, "loss": 0.4843, "step": 9480 }, { "epoch": 1.21, "grad_norm": 0.6466972396294765, "learning_rate": 3.579239159831896e-06, "loss": 0.494, "step": 9481 }, { "epoch": 1.21, "grad_norm": 0.5525207693083822, "learning_rate": 3.5782501266781577e-06, "loss": 0.4292, "step": 9482 }, { "epoch": 1.21, "grad_norm": 0.7272640659632693, "learning_rate": 3.5772611540437687e-06, "loss": 0.476, "step": 9483 }, { "epoch": 1.21, "grad_norm": 0.6582388715087351, "learning_rate": 3.5762722419708255e-06, "loss": 0.4672, "step": 9484 }, { "epoch": 1.21, "grad_norm": 1.0332531210569487, "learning_rate": 3.575283390501424e-06, "loss": 0.5439, "step": 9485 }, { "epoch": 1.21, "grad_norm": 0.7006529815804571, "learning_rate": 3.574294599677656e-06, "loss": 0.5591, "step": 9486 }, { "epoch": 1.21, "grad_norm": 0.9239951531892011, "learning_rate": 3.57330586954161e-06, "loss": 0.5313, "step": 9487 }, { "epoch": 1.21, "grad_norm": 0.6797135075251783, "learning_rate": 3.5723172001353747e-06, "loss": 0.4539, "step": 9488 }, { "epoch": 1.21, "grad_norm": 0.9071326519953101, "learning_rate": 3.571328591501033e-06, "loss": 0.5236, "step": 9489 }, { "epoch": 1.21, "grad_norm": 0.8655001204343843, "learning_rate": 3.570340043680669e-06, "loss": 0.4913, "step": 9490 }, { "epoch": 1.21, "grad_norm": 0.547057778314284, "learning_rate": 3.5693515567163604e-06, "loss": 0.4886, "step": 9491 }, { "epoch": 1.21, "grad_norm": 0.7234796173785958, "learning_rate": 3.568363130650182e-06, "loss": 0.4973, "step": 9492 }, { "epoch": 1.21, "grad_norm": 0.5463726485089478, "learning_rate": 3.567374765524213e-06, "loss": 0.4064, "step": 9493 }, { "epoch": 1.21, "grad_norm": 0.708796163515261, "learning_rate": 3.5663864613805217e-06, "loss": 0.5194, "step": 9494 }, { "epoch": 1.21, "grad_norm": 1.8742522625023077, "learning_rate": 3.565398218261178e-06, "loss": 0.5662, "step": 9495 }, { "epoch": 1.21, "grad_norm": 0.7488353337072939, "learning_rate": 3.564410036208247e-06, "loss": 0.4824, "step": 9496 }, { "epoch": 1.21, "grad_norm": 0.6227279932724672, "learning_rate": 3.563421915263794e-06, "loss": 0.407, "step": 9497 }, { "epoch": 1.21, "grad_norm": 0.7032691500929044, "learning_rate": 3.562433855469879e-06, "loss": 0.4756, "step": 9498 }, { "epoch": 1.21, "grad_norm": 0.7684014194535747, "learning_rate": 3.561445856868561e-06, "loss": 0.5294, "step": 9499 }, { "epoch": 1.21, "grad_norm": 0.6712242771991961, "learning_rate": 3.5604579195018962e-06, "loss": 0.4962, "step": 9500 }, { "epoch": 1.21, "grad_norm": 0.7469355859788023, "learning_rate": 3.5594700434119378e-06, "loss": 0.4495, "step": 9501 }, { "epoch": 1.21, "grad_norm": 0.6225801155766425, "learning_rate": 3.558482228640736e-06, "loss": 0.4564, "step": 9502 }, { "epoch": 1.21, "grad_norm": 0.692915274564329, "learning_rate": 3.5574944752303394e-06, "loss": 0.4773, "step": 9503 }, { "epoch": 1.21, "grad_norm": 0.7203387464232403, "learning_rate": 3.5565067832227933e-06, "loss": 0.5009, "step": 9504 }, { "epoch": 1.21, "grad_norm": 0.6163578035778042, "learning_rate": 3.5555191526601395e-06, "loss": 0.5272, "step": 9505 }, { "epoch": 1.21, "grad_norm": 0.7545949123118146, "learning_rate": 3.554531583584421e-06, "loss": 0.5605, "step": 9506 }, { "epoch": 1.21, "grad_norm": 0.7096062047494669, "learning_rate": 3.5535440760376736e-06, "loss": 0.5372, "step": 9507 }, { "epoch": 1.21, "grad_norm": 0.7589371207711612, "learning_rate": 3.5525566300619318e-06, "loss": 0.5539, "step": 9508 }, { "epoch": 1.21, "grad_norm": 0.740793068235596, "learning_rate": 3.55156924569923e-06, "loss": 0.4934, "step": 9509 }, { "epoch": 1.21, "grad_norm": 0.7266898947552103, "learning_rate": 3.5505819229915963e-06, "loss": 0.5067, "step": 9510 }, { "epoch": 1.21, "grad_norm": 0.676615527553606, "learning_rate": 3.549594661981059e-06, "loss": 0.4779, "step": 9511 }, { "epoch": 1.21, "grad_norm": 0.7122453572063476, "learning_rate": 3.5486074627096425e-06, "loss": 0.4317, "step": 9512 }, { "epoch": 1.21, "grad_norm": 0.6194319823175132, "learning_rate": 3.5476203252193676e-06, "loss": 0.5079, "step": 9513 }, { "epoch": 1.21, "grad_norm": 0.59752804298602, "learning_rate": 3.5466332495522543e-06, "loss": 0.4442, "step": 9514 }, { "epoch": 1.21, "grad_norm": 0.6544476110689336, "learning_rate": 3.5456462357503197e-06, "loss": 0.4124, "step": 9515 }, { "epoch": 1.21, "grad_norm": 0.6783805184502651, "learning_rate": 3.5446592838555773e-06, "loss": 0.4609, "step": 9516 }, { "epoch": 1.21, "grad_norm": 0.5867172345646141, "learning_rate": 3.543672393910037e-06, "loss": 0.4424, "step": 9517 }, { "epoch": 1.21, "grad_norm": 0.6976572604775314, "learning_rate": 3.5426855659557107e-06, "loss": 0.5671, "step": 9518 }, { "epoch": 1.21, "grad_norm": 0.7744659246456838, "learning_rate": 3.541698800034603e-06, "loss": 0.5592, "step": 9519 }, { "epoch": 1.21, "grad_norm": 0.7010569671479242, "learning_rate": 3.540712096188717e-06, "loss": 0.5154, "step": 9520 }, { "epoch": 1.21, "grad_norm": 0.6287833384926903, "learning_rate": 3.5397254544600545e-06, "loss": 0.4634, "step": 9521 }, { "epoch": 1.21, "grad_norm": 0.6728368009414035, "learning_rate": 3.5387388748906122e-06, "loss": 0.5306, "step": 9522 }, { "epoch": 1.21, "grad_norm": 0.9248970475385573, "learning_rate": 3.537752357522387e-06, "loss": 0.5508, "step": 9523 }, { "epoch": 1.21, "grad_norm": 0.8055062852865371, "learning_rate": 3.536765902397371e-06, "loss": 0.557, "step": 9524 }, { "epoch": 1.21, "grad_norm": 0.6662121900971837, "learning_rate": 3.535779509557555e-06, "loss": 0.4929, "step": 9525 }, { "epoch": 1.21, "grad_norm": 0.6850244133045399, "learning_rate": 3.5347931790449257e-06, "loss": 0.4481, "step": 9526 }, { "epoch": 1.21, "grad_norm": 0.7584437978991218, "learning_rate": 3.5338069109014694e-06, "loss": 0.5271, "step": 9527 }, { "epoch": 1.21, "grad_norm": 0.8100403308718558, "learning_rate": 3.5328207051691672e-06, "loss": 0.5173, "step": 9528 }, { "epoch": 1.21, "grad_norm": 0.8487650941987739, "learning_rate": 3.5318345618899992e-06, "loss": 0.5182, "step": 9529 }, { "epoch": 1.21, "grad_norm": 0.6133629067743392, "learning_rate": 3.5308484811059402e-06, "loss": 0.4626, "step": 9530 }, { "epoch": 1.21, "grad_norm": 0.942036253091359, "learning_rate": 3.5298624628589683e-06, "loss": 0.5166, "step": 9531 }, { "epoch": 1.21, "grad_norm": 0.6445150671411881, "learning_rate": 3.528876507191055e-06, "loss": 0.4719, "step": 9532 }, { "epoch": 1.21, "grad_norm": 0.5909108258872108, "learning_rate": 3.527890614144166e-06, "loss": 0.4, "step": 9533 }, { "epoch": 1.21, "grad_norm": 0.5798541954088046, "learning_rate": 3.52690478376027e-06, "loss": 0.4077, "step": 9534 }, { "epoch": 1.21, "grad_norm": 0.6778919284185424, "learning_rate": 3.52591901608133e-06, "loss": 0.502, "step": 9535 }, { "epoch": 1.21, "grad_norm": 0.820877798191278, "learning_rate": 3.5249333111493066e-06, "loss": 0.5297, "step": 9536 }, { "epoch": 1.21, "grad_norm": 0.697150675883807, "learning_rate": 3.523947669006159e-06, "loss": 0.4982, "step": 9537 }, { "epoch": 1.22, "grad_norm": 0.6344091185942939, "learning_rate": 3.522962089693843e-06, "loss": 0.4921, "step": 9538 }, { "epoch": 1.22, "grad_norm": 1.3954342319233473, "learning_rate": 3.5219765732543097e-06, "loss": 0.5411, "step": 9539 }, { "epoch": 1.22, "grad_norm": 0.7648751669554349, "learning_rate": 3.520991119729511e-06, "loss": 0.5726, "step": 9540 }, { "epoch": 1.22, "grad_norm": 0.7401215041280158, "learning_rate": 3.520005729161394e-06, "loss": 0.5643, "step": 9541 }, { "epoch": 1.22, "grad_norm": 0.6102587041592459, "learning_rate": 3.5190204015919023e-06, "loss": 0.4681, "step": 9542 }, { "epoch": 1.22, "grad_norm": 0.6515046034696517, "learning_rate": 3.518035137062981e-06, "loss": 0.4295, "step": 9543 }, { "epoch": 1.22, "grad_norm": 0.6335726288547892, "learning_rate": 3.5170499356165688e-06, "loss": 0.4862, "step": 9544 }, { "epoch": 1.22, "grad_norm": 0.931279516981549, "learning_rate": 3.5160647972946016e-06, "loss": 0.4985, "step": 9545 }, { "epoch": 1.22, "grad_norm": 0.6541495643489148, "learning_rate": 3.5150797221390137e-06, "loss": 0.4314, "step": 9546 }, { "epoch": 1.22, "grad_norm": 0.6574708678092356, "learning_rate": 3.514094710191738e-06, "loss": 0.4937, "step": 9547 }, { "epoch": 1.22, "grad_norm": 0.7812278288880611, "learning_rate": 3.5131097614947007e-06, "loss": 0.4697, "step": 9548 }, { "epoch": 1.22, "grad_norm": 0.6514992822042747, "learning_rate": 3.51212487608983e-06, "loss": 0.4267, "step": 9549 }, { "epoch": 1.22, "grad_norm": 0.6239247188201337, "learning_rate": 3.5111400540190493e-06, "loss": 0.4916, "step": 9550 }, { "epoch": 1.22, "grad_norm": 0.87093789534363, "learning_rate": 3.5101552953242785e-06, "loss": 0.5558, "step": 9551 }, { "epoch": 1.22, "grad_norm": 0.7011620251504134, "learning_rate": 3.509170600047436e-06, "loss": 0.5146, "step": 9552 }, { "epoch": 1.22, "grad_norm": 0.7252922378649093, "learning_rate": 3.5081859682304366e-06, "loss": 0.5136, "step": 9553 }, { "epoch": 1.22, "grad_norm": 0.6490925619006583, "learning_rate": 3.5072013999151917e-06, "loss": 0.4826, "step": 9554 }, { "epoch": 1.22, "grad_norm": 0.6454691033825337, "learning_rate": 3.506216895143615e-06, "loss": 0.4714, "step": 9555 }, { "epoch": 1.22, "grad_norm": 0.6220440688297076, "learning_rate": 3.505232453957611e-06, "loss": 0.4995, "step": 9556 }, { "epoch": 1.22, "grad_norm": 0.793677132118531, "learning_rate": 3.5042480763990857e-06, "loss": 0.5655, "step": 9557 }, { "epoch": 1.22, "grad_norm": 0.8110021035452885, "learning_rate": 3.5032637625099397e-06, "loss": 0.5614, "step": 9558 }, { "epoch": 1.22, "grad_norm": 0.8370877753247526, "learning_rate": 3.5022795123320718e-06, "loss": 0.5392, "step": 9559 }, { "epoch": 1.22, "grad_norm": 1.7992332193530127, "learning_rate": 3.50129532590738e-06, "loss": 0.5056, "step": 9560 }, { "epoch": 1.22, "grad_norm": 0.6268060304286129, "learning_rate": 3.5003112032777563e-06, "loss": 0.4374, "step": 9561 }, { "epoch": 1.22, "grad_norm": 0.7427240184302516, "learning_rate": 3.499327144485093e-06, "loss": 0.5067, "step": 9562 }, { "epoch": 1.22, "grad_norm": 0.6031566811450092, "learning_rate": 3.4983431495712773e-06, "loss": 0.4219, "step": 9563 }, { "epoch": 1.22, "grad_norm": 0.692715235767375, "learning_rate": 3.4973592185781955e-06, "loss": 0.4432, "step": 9564 }, { "epoch": 1.22, "grad_norm": 0.6455174718618477, "learning_rate": 3.4963753515477294e-06, "loss": 0.4991, "step": 9565 }, { "epoch": 1.22, "grad_norm": 0.6618973069986885, "learning_rate": 3.4953915485217603e-06, "loss": 0.4734, "step": 9566 }, { "epoch": 1.22, "grad_norm": 0.6091413365808238, "learning_rate": 3.4944078095421636e-06, "loss": 0.4451, "step": 9567 }, { "epoch": 1.22, "grad_norm": 0.6361375464784068, "learning_rate": 3.4934241346508165e-06, "loss": 0.4555, "step": 9568 }, { "epoch": 1.22, "grad_norm": 0.5552947658644004, "learning_rate": 3.492440523889589e-06, "loss": 0.437, "step": 9569 }, { "epoch": 1.22, "grad_norm": 0.638820309850292, "learning_rate": 3.4914569773003503e-06, "loss": 0.4663, "step": 9570 }, { "epoch": 1.22, "grad_norm": 0.6409077448931367, "learning_rate": 3.4904734949249685e-06, "loss": 0.4226, "step": 9571 }, { "epoch": 1.22, "grad_norm": 0.6524659815371653, "learning_rate": 3.489490076805306e-06, "loss": 0.4889, "step": 9572 }, { "epoch": 1.22, "grad_norm": 0.8104513560359653, "learning_rate": 3.4885067229832246e-06, "loss": 0.5174, "step": 9573 }, { "epoch": 1.22, "grad_norm": 0.573260450430759, "learning_rate": 3.487523433500582e-06, "loss": 0.4272, "step": 9574 }, { "epoch": 1.22, "grad_norm": 0.5968621977917752, "learning_rate": 3.4865402083992335e-06, "loss": 0.4538, "step": 9575 }, { "epoch": 1.22, "grad_norm": 0.753852619431407, "learning_rate": 3.485557047721032e-06, "loss": 0.3875, "step": 9576 }, { "epoch": 1.22, "grad_norm": 0.6648529597676575, "learning_rate": 3.484573951507827e-06, "loss": 0.4558, "step": 9577 }, { "epoch": 1.22, "grad_norm": 0.6038583918411186, "learning_rate": 3.4835909198014674e-06, "loss": 0.4875, "step": 9578 }, { "epoch": 1.22, "grad_norm": 0.6562597198557258, "learning_rate": 3.482607952643794e-06, "loss": 0.4694, "step": 9579 }, { "epoch": 1.22, "grad_norm": 0.637734725565027, "learning_rate": 3.4816250500766534e-06, "loss": 0.4651, "step": 9580 }, { "epoch": 1.22, "grad_norm": 0.6243187144183077, "learning_rate": 3.480642212141882e-06, "loss": 0.4524, "step": 9581 }, { "epoch": 1.22, "grad_norm": 0.6205104751826146, "learning_rate": 3.4796594388813165e-06, "loss": 0.4633, "step": 9582 }, { "epoch": 1.22, "grad_norm": 0.7537233492004364, "learning_rate": 3.4786767303367904e-06, "loss": 0.5545, "step": 9583 }, { "epoch": 1.22, "grad_norm": 0.8048135126357497, "learning_rate": 3.477694086550134e-06, "loss": 0.5571, "step": 9584 }, { "epoch": 1.22, "grad_norm": 0.7253749724185615, "learning_rate": 3.476711507563176e-06, "loss": 0.5059, "step": 9585 }, { "epoch": 1.22, "grad_norm": 0.836737129944647, "learning_rate": 3.475728993417742e-06, "loss": 0.5169, "step": 9586 }, { "epoch": 1.22, "grad_norm": 0.7551192186028224, "learning_rate": 3.474746544155653e-06, "loss": 0.4917, "step": 9587 }, { "epoch": 1.22, "grad_norm": 0.7168967448397087, "learning_rate": 3.47376415981873e-06, "loss": 0.445, "step": 9588 }, { "epoch": 1.22, "grad_norm": 0.5533768624867513, "learning_rate": 3.47278184044879e-06, "loss": 0.4699, "step": 9589 }, { "epoch": 1.22, "grad_norm": 0.6531325892846266, "learning_rate": 3.4717995860876462e-06, "loss": 0.4578, "step": 9590 }, { "epoch": 1.22, "grad_norm": 0.7436235037554774, "learning_rate": 3.4708173967771107e-06, "loss": 0.488, "step": 9591 }, { "epoch": 1.22, "grad_norm": 0.8112057817512172, "learning_rate": 3.46983527255899e-06, "loss": 0.5133, "step": 9592 }, { "epoch": 1.22, "grad_norm": 0.6072924295617089, "learning_rate": 3.468853213475094e-06, "loss": 0.4526, "step": 9593 }, { "epoch": 1.22, "grad_norm": 0.6088393253612352, "learning_rate": 3.467871219567224e-06, "loss": 0.4677, "step": 9594 }, { "epoch": 1.22, "grad_norm": 0.6354448602537883, "learning_rate": 3.4668892908771802e-06, "loss": 0.4607, "step": 9595 }, { "epoch": 1.22, "grad_norm": 0.588668623792249, "learning_rate": 3.4659074274467596e-06, "loss": 0.4394, "step": 9596 }, { "epoch": 1.22, "grad_norm": 0.6483942601325419, "learning_rate": 3.464925629317758e-06, "loss": 0.4328, "step": 9597 }, { "epoch": 1.22, "grad_norm": 0.6266912586228107, "learning_rate": 3.463943896531967e-06, "loss": 0.4791, "step": 9598 }, { "epoch": 1.22, "grad_norm": 0.6582908632337373, "learning_rate": 3.462962229131176e-06, "loss": 0.4797, "step": 9599 }, { "epoch": 1.22, "grad_norm": 0.8104118433322373, "learning_rate": 3.461980627157171e-06, "loss": 0.4799, "step": 9600 }, { "epoch": 1.22, "grad_norm": 0.6214936350332427, "learning_rate": 3.4609990906517367e-06, "loss": 0.47, "step": 9601 }, { "epoch": 1.22, "grad_norm": 0.8602054275217241, "learning_rate": 3.460017619656652e-06, "loss": 0.5482, "step": 9602 }, { "epoch": 1.22, "grad_norm": 0.8458931340294048, "learning_rate": 3.459036214213697e-06, "loss": 0.5587, "step": 9603 }, { "epoch": 1.22, "grad_norm": 0.7366719521818139, "learning_rate": 3.458054874364643e-06, "loss": 0.531, "step": 9604 }, { "epoch": 1.22, "grad_norm": 0.7087381750490165, "learning_rate": 3.4570736001512685e-06, "loss": 0.5459, "step": 9605 }, { "epoch": 1.22, "grad_norm": 0.7783415464754193, "learning_rate": 3.45609239161534e-06, "loss": 0.4991, "step": 9606 }, { "epoch": 1.22, "grad_norm": 0.6350011996685322, "learning_rate": 3.455111248798625e-06, "loss": 0.481, "step": 9607 }, { "epoch": 1.22, "grad_norm": 0.7195693241568325, "learning_rate": 3.4541301717428877e-06, "loss": 0.4741, "step": 9608 }, { "epoch": 1.22, "grad_norm": 0.7088978698882897, "learning_rate": 3.4531491604898888e-06, "loss": 0.464, "step": 9609 }, { "epoch": 1.22, "grad_norm": 0.721564441042376, "learning_rate": 3.452168215081387e-06, "loss": 0.5201, "step": 9610 }, { "epoch": 1.22, "grad_norm": 0.7811436353412271, "learning_rate": 3.451187335559138e-06, "loss": 0.5216, "step": 9611 }, { "epoch": 1.22, "grad_norm": 0.6132511808358145, "learning_rate": 3.450206521964895e-06, "loss": 0.4325, "step": 9612 }, { "epoch": 1.22, "grad_norm": 0.6925964234648885, "learning_rate": 3.4492257743404078e-06, "loss": 0.4474, "step": 9613 }, { "epoch": 1.22, "grad_norm": 0.6562312598877206, "learning_rate": 3.4482450927274242e-06, "loss": 0.4717, "step": 9614 }, { "epoch": 1.22, "grad_norm": 1.0228066324862592, "learning_rate": 3.4472644771676878e-06, "loss": 0.5333, "step": 9615 }, { "epoch": 1.23, "grad_norm": 0.5807906563440279, "learning_rate": 3.4462839277029393e-06, "loss": 0.4398, "step": 9616 }, { "epoch": 1.23, "grad_norm": 0.6340389532708894, "learning_rate": 3.445303444374921e-06, "loss": 0.4507, "step": 9617 }, { "epoch": 1.23, "grad_norm": 0.7427823046939891, "learning_rate": 3.4443230272253662e-06, "loss": 0.5257, "step": 9618 }, { "epoch": 1.23, "grad_norm": 0.6269877363995071, "learning_rate": 3.44334267629601e-06, "loss": 0.477, "step": 9619 }, { "epoch": 1.23, "grad_norm": 0.743625191470743, "learning_rate": 3.442362391628581e-06, "loss": 0.5226, "step": 9620 }, { "epoch": 1.23, "grad_norm": 0.6038063353448906, "learning_rate": 3.4413821732648077e-06, "loss": 0.4348, "step": 9621 }, { "epoch": 1.23, "grad_norm": 0.7309311624728724, "learning_rate": 3.4404020212464147e-06, "loss": 0.4612, "step": 9622 }, { "epoch": 1.23, "grad_norm": 0.6570554203276916, "learning_rate": 3.439421935615124e-06, "loss": 0.4936, "step": 9623 }, { "epoch": 1.23, "grad_norm": 0.7456937621873247, "learning_rate": 3.4384419164126537e-06, "loss": 0.5251, "step": 9624 }, { "epoch": 1.23, "grad_norm": 0.6025230415683878, "learning_rate": 3.4374619636807217e-06, "loss": 0.4733, "step": 9625 }, { "epoch": 1.23, "grad_norm": 0.7239543169751721, "learning_rate": 3.4364820774610406e-06, "loss": 0.5405, "step": 9626 }, { "epoch": 1.23, "grad_norm": 0.6155250039409523, "learning_rate": 3.4355022577953216e-06, "loss": 0.4676, "step": 9627 }, { "epoch": 1.23, "grad_norm": 1.212115193194442, "learning_rate": 3.4345225047252718e-06, "loss": 0.515, "step": 9628 }, { "epoch": 1.23, "grad_norm": 0.5883823266749431, "learning_rate": 3.4335428182925956e-06, "loss": 0.4876, "step": 9629 }, { "epoch": 1.23, "grad_norm": 0.6486140846555637, "learning_rate": 3.4325631985389964e-06, "loss": 0.4339, "step": 9630 }, { "epoch": 1.23, "grad_norm": 0.6670372602311031, "learning_rate": 3.4315836455061735e-06, "loss": 0.4921, "step": 9631 }, { "epoch": 1.23, "grad_norm": 0.6278492413799993, "learning_rate": 3.4306041592358218e-06, "loss": 0.5041, "step": 9632 }, { "epoch": 1.23, "grad_norm": 0.5915076063710176, "learning_rate": 3.429624739769637e-06, "loss": 0.465, "step": 9633 }, { "epoch": 1.23, "grad_norm": 0.7136725725353825, "learning_rate": 3.4286453871493087e-06, "loss": 0.5453, "step": 9634 }, { "epoch": 1.23, "grad_norm": 1.3624164864499388, "learning_rate": 3.4276661014165246e-06, "loss": 0.5368, "step": 9635 }, { "epoch": 1.23, "grad_norm": 0.747894990043783, "learning_rate": 3.426686882612971e-06, "loss": 0.5623, "step": 9636 }, { "epoch": 1.23, "grad_norm": 0.7980325720582407, "learning_rate": 3.4257077307803286e-06, "loss": 0.562, "step": 9637 }, { "epoch": 1.23, "grad_norm": 0.712089915023937, "learning_rate": 3.4247286459602775e-06, "loss": 0.4962, "step": 9638 }, { "epoch": 1.23, "grad_norm": 0.5620463504382522, "learning_rate": 3.4237496281944936e-06, "loss": 0.3795, "step": 9639 }, { "epoch": 1.23, "grad_norm": 0.6172752564627064, "learning_rate": 3.4227706775246517e-06, "loss": 0.4245, "step": 9640 }, { "epoch": 1.23, "grad_norm": 0.609990838409253, "learning_rate": 3.4217917939924204e-06, "loss": 0.5065, "step": 9641 }, { "epoch": 1.23, "grad_norm": 0.8053919550094233, "learning_rate": 3.4208129776394712e-06, "loss": 0.5541, "step": 9642 }, { "epoch": 1.23, "grad_norm": 1.120625348506504, "learning_rate": 3.4198342285074667e-06, "loss": 0.5251, "step": 9643 }, { "epoch": 1.23, "grad_norm": 0.9142736406870288, "learning_rate": 3.418855546638069e-06, "loss": 0.4557, "step": 9644 }, { "epoch": 1.23, "grad_norm": 0.5845516104725537, "learning_rate": 3.417876932072939e-06, "loss": 0.4323, "step": 9645 }, { "epoch": 1.23, "grad_norm": 0.7049217338081998, "learning_rate": 3.4168983848537316e-06, "loss": 0.4971, "step": 9646 }, { "epoch": 1.23, "grad_norm": 0.7026243089837511, "learning_rate": 3.4159199050221015e-06, "loss": 0.4888, "step": 9647 }, { "epoch": 1.23, "grad_norm": 0.6423647309746382, "learning_rate": 3.414941492619699e-06, "loss": 0.4698, "step": 9648 }, { "epoch": 1.23, "grad_norm": 0.6964945513856139, "learning_rate": 3.413963147688172e-06, "loss": 0.5212, "step": 9649 }, { "epoch": 1.23, "grad_norm": 0.6214252614056979, "learning_rate": 3.4129848702691648e-06, "loss": 0.4758, "step": 9650 }, { "epoch": 1.23, "grad_norm": 0.7473654028416774, "learning_rate": 3.412006660404321e-06, "loss": 0.4981, "step": 9651 }, { "epoch": 1.23, "grad_norm": 0.7338716613941693, "learning_rate": 3.4110285181352787e-06, "loss": 0.4483, "step": 9652 }, { "epoch": 1.23, "grad_norm": 0.5979132549143013, "learning_rate": 3.410050443503675e-06, "loss": 0.4381, "step": 9653 }, { "epoch": 1.23, "grad_norm": 0.6845570601229272, "learning_rate": 3.409072436551142e-06, "loss": 0.4958, "step": 9654 }, { "epoch": 1.23, "grad_norm": 0.5776470081109305, "learning_rate": 3.4080944973193127e-06, "loss": 0.4978, "step": 9655 }, { "epoch": 1.23, "grad_norm": 0.6818693753463683, "learning_rate": 3.4071166258498134e-06, "loss": 0.5582, "step": 9656 }, { "epoch": 1.23, "grad_norm": 0.7405322976684284, "learning_rate": 3.4061388221842694e-06, "loss": 0.5394, "step": 9657 }, { "epoch": 1.23, "grad_norm": 0.7630809837510244, "learning_rate": 3.4051610863643026e-06, "loss": 0.5184, "step": 9658 }, { "epoch": 1.23, "grad_norm": 0.6240005113126648, "learning_rate": 3.4041834184315325e-06, "loss": 0.5185, "step": 9659 }, { "epoch": 1.23, "grad_norm": 0.738748319660092, "learning_rate": 3.403205818427574e-06, "loss": 0.5198, "step": 9660 }, { "epoch": 1.23, "grad_norm": 0.85820358644243, "learning_rate": 3.4022282863940415e-06, "loss": 0.5498, "step": 9661 }, { "epoch": 1.23, "grad_norm": 0.7723464552274426, "learning_rate": 3.401250822372545e-06, "loss": 0.5401, "step": 9662 }, { "epoch": 1.23, "grad_norm": 0.9412500161288903, "learning_rate": 3.4002734264046923e-06, "loss": 0.4923, "step": 9663 }, { "epoch": 1.23, "grad_norm": 0.6688416186565718, "learning_rate": 3.3992960985320877e-06, "loss": 0.4637, "step": 9664 }, { "epoch": 1.23, "grad_norm": 0.6901652374288079, "learning_rate": 3.3983188387963334e-06, "loss": 0.4753, "step": 9665 }, { "epoch": 1.23, "grad_norm": 0.6530480862320649, "learning_rate": 3.397341647239025e-06, "loss": 0.5008, "step": 9666 }, { "epoch": 1.23, "grad_norm": 0.7884568324014779, "learning_rate": 3.3963645239017644e-06, "loss": 0.5431, "step": 9667 }, { "epoch": 1.23, "grad_norm": 0.8573859812149741, "learning_rate": 3.3953874688261413e-06, "loss": 0.49, "step": 9668 }, { "epoch": 1.23, "grad_norm": 1.2110211371568325, "learning_rate": 3.3944104820537453e-06, "loss": 0.5374, "step": 9669 }, { "epoch": 1.23, "grad_norm": 0.6887016607639445, "learning_rate": 3.393433563626165e-06, "loss": 0.4968, "step": 9670 }, { "epoch": 1.23, "grad_norm": 0.754896727447843, "learning_rate": 3.392456713584984e-06, "loss": 0.5469, "step": 9671 }, { "epoch": 1.23, "grad_norm": 0.7532981159357144, "learning_rate": 3.3914799319717837e-06, "loss": 0.5089, "step": 9672 }, { "epoch": 1.23, "grad_norm": 0.7393912423406087, "learning_rate": 3.390503218828143e-06, "loss": 0.5416, "step": 9673 }, { "epoch": 1.23, "grad_norm": 0.8085606629083892, "learning_rate": 3.389526574195636e-06, "loss": 0.5285, "step": 9674 }, { "epoch": 1.23, "grad_norm": 0.6091851025976537, "learning_rate": 3.388549998115837e-06, "loss": 0.4232, "step": 9675 }, { "epoch": 1.23, "grad_norm": 0.5707818476742368, "learning_rate": 3.3875734906303146e-06, "loss": 0.4511, "step": 9676 }, { "epoch": 1.23, "grad_norm": 0.7227538980858315, "learning_rate": 3.386597051780637e-06, "loss": 0.4839, "step": 9677 }, { "epoch": 1.23, "grad_norm": 0.6104563736198384, "learning_rate": 3.3856206816083647e-06, "loss": 0.4512, "step": 9678 }, { "epoch": 1.23, "grad_norm": 0.7664101677574381, "learning_rate": 3.384644380155063e-06, "loss": 0.4882, "step": 9679 }, { "epoch": 1.23, "grad_norm": 0.7209889836951127, "learning_rate": 3.383668147462289e-06, "loss": 0.5079, "step": 9680 }, { "epoch": 1.23, "grad_norm": 0.668217285319877, "learning_rate": 3.3826919835715965e-06, "loss": 0.4372, "step": 9681 }, { "epoch": 1.23, "grad_norm": 0.6373655781152588, "learning_rate": 3.3817158885245376e-06, "loss": 0.5166, "step": 9682 }, { "epoch": 1.23, "grad_norm": 0.9074841743386916, "learning_rate": 3.380739862362662e-06, "loss": 0.5054, "step": 9683 }, { "epoch": 1.23, "grad_norm": 0.7556160265804783, "learning_rate": 3.3797639051275166e-06, "loss": 0.5402, "step": 9684 }, { "epoch": 1.23, "grad_norm": 0.7500841576864383, "learning_rate": 3.3787880168606442e-06, "loss": 0.4938, "step": 9685 }, { "epoch": 1.23, "grad_norm": 0.6651220283663039, "learning_rate": 3.377812197603585e-06, "loss": 0.4682, "step": 9686 }, { "epoch": 1.23, "grad_norm": 0.6644771417163498, "learning_rate": 3.3768364473978766e-06, "loss": 0.4449, "step": 9687 }, { "epoch": 1.23, "grad_norm": 0.5885818079786101, "learning_rate": 3.375860766285054e-06, "loss": 0.438, "step": 9688 }, { "epoch": 1.23, "grad_norm": 0.6493892725602527, "learning_rate": 3.3748851543066486e-06, "loss": 0.4498, "step": 9689 }, { "epoch": 1.23, "grad_norm": 0.5903591044995382, "learning_rate": 3.373909611504189e-06, "loss": 0.4344, "step": 9690 }, { "epoch": 1.23, "grad_norm": 0.8475285021136163, "learning_rate": 3.3729341379192006e-06, "loss": 0.4975, "step": 9691 }, { "epoch": 1.23, "grad_norm": 0.6043653900253609, "learning_rate": 3.371958733593207e-06, "loss": 0.4319, "step": 9692 }, { "epoch": 1.23, "grad_norm": 0.5772567621284745, "learning_rate": 3.370983398567727e-06, "loss": 0.4342, "step": 9693 }, { "epoch": 1.23, "grad_norm": 0.6100995651157373, "learning_rate": 3.370008132884278e-06, "loss": 0.4757, "step": 9694 }, { "epoch": 1.24, "grad_norm": 0.5980289280900019, "learning_rate": 3.3690329365843743e-06, "loss": 0.4743, "step": 9695 }, { "epoch": 1.24, "grad_norm": 0.8002261870219475, "learning_rate": 3.3680578097095275e-06, "loss": 0.4867, "step": 9696 }, { "epoch": 1.24, "grad_norm": 0.7843917144754352, "learning_rate": 3.3670827523012443e-06, "loss": 0.4666, "step": 9697 }, { "epoch": 1.24, "grad_norm": 0.8952009648199384, "learning_rate": 3.3661077644010308e-06, "loss": 0.5205, "step": 9698 }, { "epoch": 1.24, "grad_norm": 1.006639819442437, "learning_rate": 3.3651328460503884e-06, "loss": 0.5611, "step": 9699 }, { "epoch": 1.24, "grad_norm": 0.8166682729650561, "learning_rate": 3.364157997290816e-06, "loss": 0.5293, "step": 9700 }, { "epoch": 1.24, "grad_norm": 0.6763966588639446, "learning_rate": 3.363183218163811e-06, "loss": 0.5146, "step": 9701 }, { "epoch": 1.24, "grad_norm": 0.7478200352706385, "learning_rate": 3.3622085087108653e-06, "loss": 0.5068, "step": 9702 }, { "epoch": 1.24, "grad_norm": 0.674491227335443, "learning_rate": 3.3612338689734693e-06, "loss": 0.444, "step": 9703 }, { "epoch": 1.24, "grad_norm": 0.618147605645662, "learning_rate": 3.360259298993111e-06, "loss": 0.4791, "step": 9704 }, { "epoch": 1.24, "grad_norm": 0.8377174390100891, "learning_rate": 3.359284798811275e-06, "loss": 0.5463, "step": 9705 }, { "epoch": 1.24, "grad_norm": 0.5628948708384927, "learning_rate": 3.3583103684694418e-06, "loss": 0.4061, "step": 9706 }, { "epoch": 1.24, "grad_norm": 0.654613670938272, "learning_rate": 3.35733600800909e-06, "loss": 0.3981, "step": 9707 }, { "epoch": 1.24, "grad_norm": 0.6085584585489507, "learning_rate": 3.3563617174716954e-06, "loss": 0.4479, "step": 9708 }, { "epoch": 1.24, "grad_norm": 0.6910659131927186, "learning_rate": 3.3553874968987304e-06, "loss": 0.4891, "step": 9709 }, { "epoch": 1.24, "grad_norm": 0.7340759927453796, "learning_rate": 3.354413346331664e-06, "loss": 0.5156, "step": 9710 }, { "epoch": 1.24, "grad_norm": 0.7055322321284114, "learning_rate": 3.353439265811963e-06, "loss": 0.5611, "step": 9711 }, { "epoch": 1.24, "grad_norm": 0.7413780243438631, "learning_rate": 3.352465255381091e-06, "loss": 0.5525, "step": 9712 }, { "epoch": 1.24, "grad_norm": 0.7415240845122817, "learning_rate": 3.351491315080508e-06, "loss": 0.5088, "step": 9713 }, { "epoch": 1.24, "grad_norm": 0.6062353174867325, "learning_rate": 3.350517444951672e-06, "loss": 0.4666, "step": 9714 }, { "epoch": 1.24, "grad_norm": 0.5959660815935528, "learning_rate": 3.3495436450360375e-06, "loss": 0.5561, "step": 9715 }, { "epoch": 1.24, "grad_norm": 0.795469707114058, "learning_rate": 3.348569915375053e-06, "loss": 0.6284, "step": 9716 }, { "epoch": 1.24, "grad_norm": 1.122140338181277, "learning_rate": 3.3475962560101728e-06, "loss": 0.5122, "step": 9717 }, { "epoch": 1.24, "grad_norm": 0.7428464969622799, "learning_rate": 3.3466226669828396e-06, "loss": 0.5342, "step": 9718 }, { "epoch": 1.24, "grad_norm": 0.5913019605528251, "learning_rate": 3.345649148334496e-06, "loss": 0.4627, "step": 9719 }, { "epoch": 1.24, "grad_norm": 0.7312336653461774, "learning_rate": 3.3446757001065816e-06, "loss": 0.5261, "step": 9720 }, { "epoch": 1.24, "grad_norm": 0.7278513619582827, "learning_rate": 3.343702322340533e-06, "loss": 0.5114, "step": 9721 }, { "epoch": 1.24, "grad_norm": 0.7438463711809817, "learning_rate": 3.3427290150777836e-06, "loss": 0.548, "step": 9722 }, { "epoch": 1.24, "grad_norm": 0.6437042828354095, "learning_rate": 3.3417557783597643e-06, "loss": 0.4669, "step": 9723 }, { "epoch": 1.24, "grad_norm": 0.7360270876315178, "learning_rate": 3.340782612227903e-06, "loss": 0.5099, "step": 9724 }, { "epoch": 1.24, "grad_norm": 0.838277362309171, "learning_rate": 3.339809516723623e-06, "loss": 0.4796, "step": 9725 }, { "epoch": 1.24, "grad_norm": 0.5744679583378896, "learning_rate": 3.338836491888347e-06, "loss": 0.4733, "step": 9726 }, { "epoch": 1.24, "grad_norm": 0.6886928918535186, "learning_rate": 3.3378635377634927e-06, "loss": 0.5025, "step": 9727 }, { "epoch": 1.24, "grad_norm": 0.8364803837127899, "learning_rate": 3.336890654390475e-06, "loss": 0.4694, "step": 9728 }, { "epoch": 1.24, "grad_norm": 0.6527493447813238, "learning_rate": 3.335917841810709e-06, "loss": 0.435, "step": 9729 }, { "epoch": 1.24, "grad_norm": 0.6360823743572565, "learning_rate": 3.334945100065603e-06, "loss": 0.4681, "step": 9730 }, { "epoch": 1.24, "grad_norm": 0.7511257876206643, "learning_rate": 3.3339724291965637e-06, "loss": 0.5205, "step": 9731 }, { "epoch": 1.24, "grad_norm": 0.6934778456312952, "learning_rate": 3.332999829244994e-06, "loss": 0.5217, "step": 9732 }, { "epoch": 1.24, "grad_norm": 0.7043104721963191, "learning_rate": 3.3320273002522947e-06, "loss": 0.4897, "step": 9733 }, { "epoch": 1.24, "grad_norm": 0.5824895233477628, "learning_rate": 3.331054842259863e-06, "loss": 0.4755, "step": 9734 }, { "epoch": 1.24, "grad_norm": 0.5520456878069466, "learning_rate": 3.3300824553090934e-06, "loss": 0.4655, "step": 9735 }, { "epoch": 1.24, "grad_norm": 0.6506681488520155, "learning_rate": 3.329110139441377e-06, "loss": 0.4351, "step": 9736 }, { "epoch": 1.24, "grad_norm": 0.5785868482380239, "learning_rate": 3.3281378946981034e-06, "loss": 0.4369, "step": 9737 }, { "epoch": 1.24, "grad_norm": 0.5986856199645442, "learning_rate": 3.3271657211206566e-06, "loss": 0.4277, "step": 9738 }, { "epoch": 1.24, "grad_norm": 0.5732413644115761, "learning_rate": 3.3261936187504195e-06, "loss": 0.424, "step": 9739 }, { "epoch": 1.24, "grad_norm": 0.6397259322602841, "learning_rate": 3.3252215876287695e-06, "loss": 0.4793, "step": 9740 }, { "epoch": 1.24, "grad_norm": 0.7759409451260584, "learning_rate": 3.3242496277970872e-06, "loss": 0.4811, "step": 9741 }, { "epoch": 1.24, "grad_norm": 0.6419254316919619, "learning_rate": 3.3232777392967417e-06, "loss": 0.4914, "step": 9742 }, { "epoch": 1.24, "grad_norm": 0.6695568145934685, "learning_rate": 3.3223059221691063e-06, "loss": 0.4765, "step": 9743 }, { "epoch": 1.24, "grad_norm": 0.6953302181373358, "learning_rate": 3.3213341764555463e-06, "loss": 0.5149, "step": 9744 }, { "epoch": 1.24, "grad_norm": 1.2427653108082664, "learning_rate": 3.320362502197426e-06, "loss": 0.5687, "step": 9745 }, { "epoch": 1.24, "grad_norm": 0.6219528949720443, "learning_rate": 3.319390899436107e-06, "loss": 0.4848, "step": 9746 }, { "epoch": 1.24, "grad_norm": 0.5871854366235867, "learning_rate": 3.318419368212946e-06, "loss": 0.4381, "step": 9747 }, { "epoch": 1.24, "grad_norm": 0.6486799695735382, "learning_rate": 3.3174479085692997e-06, "loss": 0.4834, "step": 9748 }, { "epoch": 1.24, "grad_norm": 0.7546057165321423, "learning_rate": 3.3164765205465187e-06, "loss": 0.5192, "step": 9749 }, { "epoch": 1.24, "grad_norm": 1.1427536234375142, "learning_rate": 3.315505204185953e-06, "loss": 0.5416, "step": 9750 }, { "epoch": 1.24, "grad_norm": 0.717578958570655, "learning_rate": 3.314533959528948e-06, "loss": 0.522, "step": 9751 }, { "epoch": 1.24, "grad_norm": 0.7899855094305798, "learning_rate": 3.3135627866168465e-06, "loss": 0.5027, "step": 9752 }, { "epoch": 1.24, "grad_norm": 0.7555347788382839, "learning_rate": 3.312591685490986e-06, "loss": 0.5021, "step": 9753 }, { "epoch": 1.24, "grad_norm": 0.591234827869523, "learning_rate": 3.3116206561927074e-06, "loss": 0.4164, "step": 9754 }, { "epoch": 1.24, "grad_norm": 0.5936487336478009, "learning_rate": 3.3106496987633408e-06, "loss": 0.4967, "step": 9755 }, { "epoch": 1.24, "grad_norm": 0.8057824222276049, "learning_rate": 3.3096788132442186e-06, "loss": 0.5461, "step": 9756 }, { "epoch": 1.24, "grad_norm": 0.6994944841147624, "learning_rate": 3.308707999676668e-06, "loss": 0.5093, "step": 9757 }, { "epoch": 1.24, "grad_norm": 0.6242269304173834, "learning_rate": 3.307737258102014e-06, "loss": 0.4843, "step": 9758 }, { "epoch": 1.24, "grad_norm": 0.6617062699804608, "learning_rate": 3.3067665885615762e-06, "loss": 0.4927, "step": 9759 }, { "epoch": 1.24, "grad_norm": 0.692744029806587, "learning_rate": 3.3057959910966754e-06, "loss": 0.5064, "step": 9760 }, { "epoch": 1.24, "grad_norm": 0.7759133672417525, "learning_rate": 3.304825465748624e-06, "loss": 0.5223, "step": 9761 }, { "epoch": 1.24, "grad_norm": 0.7373400225765957, "learning_rate": 3.303855012558736e-06, "loss": 0.4861, "step": 9762 }, { "epoch": 1.24, "grad_norm": 0.5928736253734711, "learning_rate": 3.30288463156832e-06, "loss": 0.4668, "step": 9763 }, { "epoch": 1.24, "grad_norm": 0.6188388607574133, "learning_rate": 3.3019143228186825e-06, "loss": 0.4836, "step": 9764 }, { "epoch": 1.24, "grad_norm": 0.5747153962949537, "learning_rate": 3.300944086351124e-06, "loss": 0.4496, "step": 9765 }, { "epoch": 1.24, "grad_norm": 0.5840292329822466, "learning_rate": 3.2999739222069477e-06, "loss": 0.4171, "step": 9766 }, { "epoch": 1.24, "grad_norm": 0.6775475721623886, "learning_rate": 3.299003830427449e-06, "loss": 0.5136, "step": 9767 }, { "epoch": 1.24, "grad_norm": 0.8411364293719691, "learning_rate": 3.2980338110539225e-06, "loss": 0.495, "step": 9768 }, { "epoch": 1.24, "grad_norm": 0.6453161014766059, "learning_rate": 3.2970638641276563e-06, "loss": 0.4473, "step": 9769 }, { "epoch": 1.24, "grad_norm": 0.5595071340611592, "learning_rate": 3.296093989689941e-06, "loss": 0.4707, "step": 9770 }, { "epoch": 1.24, "grad_norm": 0.7997718759840764, "learning_rate": 3.2951241877820594e-06, "loss": 0.5121, "step": 9771 }, { "epoch": 1.24, "grad_norm": 0.5763401014143, "learning_rate": 3.2941544584452928e-06, "loss": 0.4461, "step": 9772 }, { "epoch": 1.25, "grad_norm": 0.6561222903062404, "learning_rate": 3.2931848017209202e-06, "loss": 0.4714, "step": 9773 }, { "epoch": 1.25, "grad_norm": 0.5806328696962216, "learning_rate": 3.292215217650215e-06, "loss": 0.47, "step": 9774 }, { "epoch": 1.25, "grad_norm": 0.6387932215629026, "learning_rate": 3.2912457062744526e-06, "loss": 0.4732, "step": 9775 }, { "epoch": 1.25, "grad_norm": 0.6183663877210778, "learning_rate": 3.290276267634899e-06, "loss": 0.4013, "step": 9776 }, { "epoch": 1.25, "grad_norm": 0.6911524512861753, "learning_rate": 3.289306901772822e-06, "loss": 0.4614, "step": 9777 }, { "epoch": 1.25, "grad_norm": 0.6957098179567416, "learning_rate": 3.2883376087294804e-06, "loss": 0.4842, "step": 9778 }, { "epoch": 1.25, "grad_norm": 0.8892080228449, "learning_rate": 3.28736838854614e-06, "loss": 0.5612, "step": 9779 }, { "epoch": 1.25, "grad_norm": 0.6049457145813931, "learning_rate": 3.2863992412640546e-06, "loss": 0.449, "step": 9780 }, { "epoch": 1.25, "grad_norm": 0.6271762013024282, "learning_rate": 3.285430166924477e-06, "loss": 0.4191, "step": 9781 }, { "epoch": 1.25, "grad_norm": 0.610450083761308, "learning_rate": 3.2844611655686586e-06, "loss": 0.5055, "step": 9782 }, { "epoch": 1.25, "grad_norm": 0.8269049664396605, "learning_rate": 3.2834922372378465e-06, "loss": 0.5623, "step": 9783 }, { "epoch": 1.25, "grad_norm": 0.7516688504708258, "learning_rate": 3.282523381973284e-06, "loss": 0.5023, "step": 9784 }, { "epoch": 1.25, "grad_norm": 0.617465779449233, "learning_rate": 3.2815545998162134e-06, "loss": 0.4418, "step": 9785 }, { "epoch": 1.25, "grad_norm": 0.7218667417046295, "learning_rate": 3.280585890807872e-06, "loss": 0.5132, "step": 9786 }, { "epoch": 1.25, "grad_norm": 0.7065332850665154, "learning_rate": 3.2796172549894946e-06, "loss": 0.5278, "step": 9787 }, { "epoch": 1.25, "grad_norm": 0.7764326359182376, "learning_rate": 3.278648692402313e-06, "loss": 0.4591, "step": 9788 }, { "epoch": 1.25, "grad_norm": 0.8430399155602538, "learning_rate": 3.2776802030875564e-06, "loss": 0.5827, "step": 9789 }, { "epoch": 1.25, "grad_norm": 0.858285110893183, "learning_rate": 3.2767117870864473e-06, "loss": 0.4886, "step": 9790 }, { "epoch": 1.25, "grad_norm": 0.6045502596390667, "learning_rate": 3.275743444440213e-06, "loss": 0.4728, "step": 9791 }, { "epoch": 1.25, "grad_norm": 0.6783595771771905, "learning_rate": 3.27477517519007e-06, "loss": 0.5057, "step": 9792 }, { "epoch": 1.25, "grad_norm": 2.369519675583337, "learning_rate": 3.2738069793772353e-06, "loss": 0.523, "step": 9793 }, { "epoch": 1.25, "grad_norm": 0.8034220648985635, "learning_rate": 3.272838857042921e-06, "loss": 0.503, "step": 9794 }, { "epoch": 1.25, "grad_norm": 0.5758599652496723, "learning_rate": 3.2718708082283367e-06, "loss": 0.4371, "step": 9795 }, { "epoch": 1.25, "grad_norm": 0.6405399937475216, "learning_rate": 3.2709028329746905e-06, "loss": 0.4621, "step": 9796 }, { "epoch": 1.25, "grad_norm": 0.8655253624930225, "learning_rate": 3.2699349313231854e-06, "loss": 0.5093, "step": 9797 }, { "epoch": 1.25, "grad_norm": 0.6035259467451702, "learning_rate": 3.2689671033150215e-06, "loss": 0.465, "step": 9798 }, { "epoch": 1.25, "grad_norm": 0.6813695313711089, "learning_rate": 3.267999348991397e-06, "loss": 0.4885, "step": 9799 }, { "epoch": 1.25, "grad_norm": 0.7322371401325936, "learning_rate": 3.2670316683935047e-06, "loss": 0.5128, "step": 9800 }, { "epoch": 1.25, "grad_norm": 0.5816729474457779, "learning_rate": 3.2660640615625374e-06, "loss": 0.4603, "step": 9801 }, { "epoch": 1.25, "grad_norm": 0.621793815698565, "learning_rate": 3.26509652853968e-06, "loss": 0.5114, "step": 9802 }, { "epoch": 1.25, "grad_norm": 0.6550650746031575, "learning_rate": 3.26412906936612e-06, "loss": 0.5285, "step": 9803 }, { "epoch": 1.25, "grad_norm": 0.744118183620498, "learning_rate": 3.263161684083039e-06, "loss": 0.5931, "step": 9804 }, { "epoch": 1.25, "grad_norm": 0.7480824190284789, "learning_rate": 3.2621943727316164e-06, "loss": 0.5122, "step": 9805 }, { "epoch": 1.25, "grad_norm": 0.6299208935569788, "learning_rate": 3.261227135353025e-06, "loss": 0.4177, "step": 9806 }, { "epoch": 1.25, "grad_norm": 0.6400414694783045, "learning_rate": 3.260259971988438e-06, "loss": 0.4496, "step": 9807 }, { "epoch": 1.25, "grad_norm": 0.5860832921444632, "learning_rate": 3.259292882679025e-06, "loss": 0.421, "step": 9808 }, { "epoch": 1.25, "grad_norm": 0.620047916547074, "learning_rate": 3.2583258674659513e-06, "loss": 0.4473, "step": 9809 }, { "epoch": 1.25, "grad_norm": 0.5974931480253365, "learning_rate": 3.2573589263903803e-06, "loss": 0.4503, "step": 9810 }, { "epoch": 1.25, "grad_norm": 0.6033952382299103, "learning_rate": 3.2563920594934708e-06, "loss": 0.4677, "step": 9811 }, { "epoch": 1.25, "grad_norm": 0.7231217390976792, "learning_rate": 3.255425266816379e-06, "loss": 0.4535, "step": 9812 }, { "epoch": 1.25, "grad_norm": 0.7748083867869024, "learning_rate": 3.254458548400259e-06, "loss": 0.5228, "step": 9813 }, { "epoch": 1.25, "grad_norm": 0.6604880883838006, "learning_rate": 3.2534919042862613e-06, "loss": 0.4374, "step": 9814 }, { "epoch": 1.25, "grad_norm": 0.7983307918453107, "learning_rate": 3.2525253345155305e-06, "loss": 0.5569, "step": 9815 }, { "epoch": 1.25, "grad_norm": 0.7923631059098096, "learning_rate": 3.251558839129213e-06, "loss": 0.5344, "step": 9816 }, { "epoch": 1.25, "grad_norm": 0.6550914907641446, "learning_rate": 3.250592418168448e-06, "loss": 0.434, "step": 9817 }, { "epoch": 1.25, "grad_norm": 0.576047407488201, "learning_rate": 3.249626071674374e-06, "loss": 0.4108, "step": 9818 }, { "epoch": 1.25, "grad_norm": 0.6019748511060291, "learning_rate": 3.2486597996881252e-06, "loss": 0.479, "step": 9819 }, { "epoch": 1.25, "grad_norm": 0.8879563645186039, "learning_rate": 3.247693602250832e-06, "loss": 0.5408, "step": 9820 }, { "epoch": 1.25, "grad_norm": 0.5803954889613333, "learning_rate": 3.246727479403623e-06, "loss": 0.4688, "step": 9821 }, { "epoch": 1.25, "grad_norm": 0.7346962074704275, "learning_rate": 3.245761431187622e-06, "loss": 0.5561, "step": 9822 }, { "epoch": 1.25, "grad_norm": 2.6534656074696925, "learning_rate": 3.244795457643951e-06, "loss": 0.5511, "step": 9823 }, { "epoch": 1.25, "grad_norm": 0.6963013815243809, "learning_rate": 3.2438295588137293e-06, "loss": 0.5389, "step": 9824 }, { "epoch": 1.25, "grad_norm": 0.7117631549899102, "learning_rate": 3.2428637347380705e-06, "loss": 0.5052, "step": 9825 }, { "epoch": 1.25, "grad_norm": 0.6773070660701833, "learning_rate": 3.2418979854580877e-06, "loss": 0.489, "step": 9826 }, { "epoch": 1.25, "grad_norm": 0.6941230596410103, "learning_rate": 3.240932311014889e-06, "loss": 0.4813, "step": 9827 }, { "epoch": 1.25, "grad_norm": 0.6645045544506429, "learning_rate": 3.2399667114495813e-06, "loss": 0.467, "step": 9828 }, { "epoch": 1.25, "grad_norm": 0.8965847741915173, "learning_rate": 3.239001186803267e-06, "loss": 0.5227, "step": 9829 }, { "epoch": 1.25, "grad_norm": 0.8943685918586383, "learning_rate": 3.238035737117044e-06, "loss": 0.5711, "step": 9830 }, { "epoch": 1.25, "grad_norm": 0.8220622806689967, "learning_rate": 3.23707036243201e-06, "loss": 0.5216, "step": 9831 }, { "epoch": 1.25, "grad_norm": 1.226829334532521, "learning_rate": 3.2361050627892566e-06, "loss": 0.5305, "step": 9832 }, { "epoch": 1.25, "grad_norm": 0.7892365585912237, "learning_rate": 3.2351398382298738e-06, "loss": 0.4717, "step": 9833 }, { "epoch": 1.25, "grad_norm": 0.6274119973413937, "learning_rate": 3.234174688794949e-06, "loss": 0.4727, "step": 9834 }, { "epoch": 1.25, "grad_norm": 0.8326464561556434, "learning_rate": 3.2332096145255646e-06, "loss": 0.4679, "step": 9835 }, { "epoch": 1.25, "grad_norm": 0.6593812566326863, "learning_rate": 3.2322446154628013e-06, "loss": 0.4642, "step": 9836 }, { "epoch": 1.25, "grad_norm": 0.6184796831804252, "learning_rate": 3.231279691647736e-06, "loss": 0.4214, "step": 9837 }, { "epoch": 1.25, "grad_norm": 0.644655182878305, "learning_rate": 3.2303148431214416e-06, "loss": 0.4533, "step": 9838 }, { "epoch": 1.25, "grad_norm": 0.6518594687625032, "learning_rate": 3.229350069924989e-06, "loss": 0.497, "step": 9839 }, { "epoch": 1.25, "grad_norm": 0.6960857625066758, "learning_rate": 3.2283853720994444e-06, "loss": 0.4705, "step": 9840 }, { "epoch": 1.25, "grad_norm": 0.6121445164106195, "learning_rate": 3.2274207496858756e-06, "loss": 0.4997, "step": 9841 }, { "epoch": 1.25, "grad_norm": 0.7472159977548641, "learning_rate": 3.2264562027253402e-06, "loss": 0.4947, "step": 9842 }, { "epoch": 1.25, "grad_norm": 0.6558324530578811, "learning_rate": 3.2254917312588974e-06, "loss": 0.4478, "step": 9843 }, { "epoch": 1.25, "grad_norm": 0.7160388078735583, "learning_rate": 3.224527335327601e-06, "loss": 0.51, "step": 9844 }, { "epoch": 1.25, "grad_norm": 0.7537417454398606, "learning_rate": 3.223563014972502e-06, "loss": 0.5289, "step": 9845 }, { "epoch": 1.25, "grad_norm": 0.6576783491403214, "learning_rate": 3.2225987702346496e-06, "loss": 0.5203, "step": 9846 }, { "epoch": 1.25, "grad_norm": 0.7367553386767534, "learning_rate": 3.221634601155087e-06, "loss": 0.5654, "step": 9847 }, { "epoch": 1.25, "grad_norm": 0.8067918691951811, "learning_rate": 3.2206705077748566e-06, "loss": 0.5808, "step": 9848 }, { "epoch": 1.25, "grad_norm": 0.7643784840076346, "learning_rate": 3.2197064901349975e-06, "loss": 0.5053, "step": 9849 }, { "epoch": 1.25, "grad_norm": 0.7753689071548957, "learning_rate": 3.2187425482765435e-06, "loss": 0.4938, "step": 9850 }, { "epoch": 1.25, "grad_norm": 0.8013427070149979, "learning_rate": 3.2177786822405267e-06, "loss": 0.5056, "step": 9851 }, { "epoch": 1.26, "grad_norm": 0.6690309837945828, "learning_rate": 3.216814892067975e-06, "loss": 0.4412, "step": 9852 }, { "epoch": 1.26, "grad_norm": 0.6420970393285401, "learning_rate": 3.215851177799917e-06, "loss": 0.4808, "step": 9853 }, { "epoch": 1.26, "grad_norm": 0.6855795059049861, "learning_rate": 3.2148875394773727e-06, "loss": 0.5056, "step": 9854 }, { "epoch": 1.26, "grad_norm": 0.713746628553741, "learning_rate": 3.2139239771413616e-06, "loss": 0.4596, "step": 9855 }, { "epoch": 1.26, "grad_norm": 0.7462333994610545, "learning_rate": 3.2129604908328994e-06, "loss": 0.5092, "step": 9856 }, { "epoch": 1.26, "grad_norm": 0.7056654666830612, "learning_rate": 3.2119970805929984e-06, "loss": 0.4592, "step": 9857 }, { "epoch": 1.26, "grad_norm": 0.5793630850885867, "learning_rate": 3.2110337464626685e-06, "loss": 0.4909, "step": 9858 }, { "epoch": 1.26, "grad_norm": 0.628451931771063, "learning_rate": 3.2100704884829147e-06, "loss": 0.4842, "step": 9859 }, { "epoch": 1.26, "grad_norm": 0.6442462346622816, "learning_rate": 3.209107306694741e-06, "loss": 0.5274, "step": 9860 }, { "epoch": 1.26, "grad_norm": 1.6686654545925466, "learning_rate": 3.2081442011391455e-06, "loss": 0.5206, "step": 9861 }, { "epoch": 1.26, "grad_norm": 0.5829889021833834, "learning_rate": 3.207181171857126e-06, "loss": 0.491, "step": 9862 }, { "epoch": 1.26, "grad_norm": 0.7432435731302618, "learning_rate": 3.2062182188896753e-06, "loss": 0.533, "step": 9863 }, { "epoch": 1.26, "grad_norm": 0.7329897402527397, "learning_rate": 3.20525534227778e-06, "loss": 0.5442, "step": 9864 }, { "epoch": 1.26, "grad_norm": 0.7453179893366322, "learning_rate": 3.204292542062432e-06, "loss": 0.5029, "step": 9865 }, { "epoch": 1.26, "grad_norm": 0.6339286054803679, "learning_rate": 3.2033298182846122e-06, "loss": 0.4973, "step": 9866 }, { "epoch": 1.26, "grad_norm": 0.7891009532643618, "learning_rate": 3.202367170985301e-06, "loss": 0.4952, "step": 9867 }, { "epoch": 1.26, "grad_norm": 0.5489119562476963, "learning_rate": 3.201404600205475e-06, "loss": 0.4355, "step": 9868 }, { "epoch": 1.26, "grad_norm": 0.697021613953802, "learning_rate": 3.2004421059861073e-06, "loss": 0.4533, "step": 9869 }, { "epoch": 1.26, "grad_norm": 0.5738472127371834, "learning_rate": 3.199479688368169e-06, "loss": 0.4254, "step": 9870 }, { "epoch": 1.26, "grad_norm": 0.7585878860833483, "learning_rate": 3.1985173473926263e-06, "loss": 0.5234, "step": 9871 }, { "epoch": 1.26, "grad_norm": 0.7739078301838713, "learning_rate": 3.197555083100443e-06, "loss": 0.5388, "step": 9872 }, { "epoch": 1.26, "grad_norm": 0.6693094729705964, "learning_rate": 3.196592895532581e-06, "loss": 0.4792, "step": 9873 }, { "epoch": 1.26, "grad_norm": 0.5724523216615389, "learning_rate": 3.195630784729996e-06, "loss": 0.4429, "step": 9874 }, { "epoch": 1.26, "grad_norm": 0.6009674516129981, "learning_rate": 3.1946687507336428e-06, "loss": 0.5281, "step": 9875 }, { "epoch": 1.26, "grad_norm": 0.8148113779397542, "learning_rate": 3.1937067935844718e-06, "loss": 0.553, "step": 9876 }, { "epoch": 1.26, "grad_norm": 0.7831115003604711, "learning_rate": 3.1927449133234295e-06, "loss": 0.5515, "step": 9877 }, { "epoch": 1.26, "grad_norm": 0.6437030484691998, "learning_rate": 3.191783109991462e-06, "loss": 0.4482, "step": 9878 }, { "epoch": 1.26, "grad_norm": 0.5867952603655965, "learning_rate": 3.1908213836295083e-06, "loss": 0.4757, "step": 9879 }, { "epoch": 1.26, "grad_norm": 0.596781784339686, "learning_rate": 3.1898597342785074e-06, "loss": 0.4097, "step": 9880 }, { "epoch": 1.26, "grad_norm": 0.7115799260134911, "learning_rate": 3.1888981619793946e-06, "loss": 0.5045, "step": 9881 }, { "epoch": 1.26, "grad_norm": 0.7465189151037821, "learning_rate": 3.1879366667730986e-06, "loss": 0.5245, "step": 9882 }, { "epoch": 1.26, "grad_norm": 0.7501013878291433, "learning_rate": 3.1869752487005485e-06, "loss": 0.4648, "step": 9883 }, { "epoch": 1.26, "grad_norm": 0.6384203260918596, "learning_rate": 3.1860139078026686e-06, "loss": 0.4206, "step": 9884 }, { "epoch": 1.26, "grad_norm": 0.5984569149862544, "learning_rate": 3.1850526441203794e-06, "loss": 0.5041, "step": 9885 }, { "epoch": 1.26, "grad_norm": 0.7459038381909749, "learning_rate": 3.1840914576945996e-06, "loss": 0.5156, "step": 9886 }, { "epoch": 1.26, "grad_norm": 0.5813552143409968, "learning_rate": 3.183130348566244e-06, "loss": 0.4494, "step": 9887 }, { "epoch": 1.26, "grad_norm": 0.7442813452056939, "learning_rate": 3.1821693167762234e-06, "loss": 0.4581, "step": 9888 }, { "epoch": 1.26, "grad_norm": 0.732492200114443, "learning_rate": 3.181208362365445e-06, "loss": 0.5736, "step": 9889 }, { "epoch": 1.26, "grad_norm": 0.7917636361984168, "learning_rate": 3.1802474853748157e-06, "loss": 0.5086, "step": 9890 }, { "epoch": 1.26, "grad_norm": 0.665647202621241, "learning_rate": 3.179286685845235e-06, "loss": 0.4554, "step": 9891 }, { "epoch": 1.26, "grad_norm": 0.563271799879308, "learning_rate": 3.178325963817603e-06, "loss": 0.4229, "step": 9892 }, { "epoch": 1.26, "grad_norm": 0.7276607686629478, "learning_rate": 3.1773653193328136e-06, "loss": 0.467, "step": 9893 }, { "epoch": 1.26, "grad_norm": 0.6964931286636661, "learning_rate": 3.1764047524317578e-06, "loss": 0.4941, "step": 9894 }, { "epoch": 1.26, "grad_norm": 0.7717033031170665, "learning_rate": 3.175444263155324e-06, "loss": 0.5581, "step": 9895 }, { "epoch": 1.26, "grad_norm": 0.5420426352527334, "learning_rate": 3.1744838515443976e-06, "loss": 0.4318, "step": 9896 }, { "epoch": 1.26, "grad_norm": 0.7670061772366024, "learning_rate": 3.1735235176398604e-06, "loss": 0.4788, "step": 9897 }, { "epoch": 1.26, "grad_norm": 0.7983310279508355, "learning_rate": 3.17256326148259e-06, "loss": 0.5207, "step": 9898 }, { "epoch": 1.26, "grad_norm": 0.8397255911643894, "learning_rate": 3.1716030831134627e-06, "loss": 0.4903, "step": 9899 }, { "epoch": 1.26, "grad_norm": 0.7256707202020258, "learning_rate": 3.1706429825733486e-06, "loss": 0.4844, "step": 9900 }, { "epoch": 1.26, "grad_norm": 0.5689093431868085, "learning_rate": 3.1696829599031176e-06, "loss": 0.4266, "step": 9901 }, { "epoch": 1.26, "grad_norm": 0.6262273340470185, "learning_rate": 3.1687230151436322e-06, "loss": 0.4583, "step": 9902 }, { "epoch": 1.26, "grad_norm": 0.670657683213048, "learning_rate": 3.1677631483357572e-06, "loss": 0.4738, "step": 9903 }, { "epoch": 1.26, "grad_norm": 0.6337897363984735, "learning_rate": 3.1668033595203505e-06, "loss": 0.4749, "step": 9904 }, { "epoch": 1.26, "grad_norm": 0.6891063871269649, "learning_rate": 3.165843648738267e-06, "loss": 0.4953, "step": 9905 }, { "epoch": 1.26, "grad_norm": 0.5968432218174194, "learning_rate": 3.1648840160303585e-06, "loss": 0.4714, "step": 9906 }, { "epoch": 1.26, "grad_norm": 0.8001986283879055, "learning_rate": 3.163924461437472e-06, "loss": 0.5573, "step": 9907 }, { "epoch": 1.26, "grad_norm": 0.7215817195717713, "learning_rate": 3.1629649850004553e-06, "loss": 0.5606, "step": 9908 }, { "epoch": 1.26, "grad_norm": 0.6206732567557802, "learning_rate": 3.1620055867601487e-06, "loss": 0.4517, "step": 9909 }, { "epoch": 1.26, "grad_norm": 0.7091691165945644, "learning_rate": 3.16104626675739e-06, "loss": 0.5157, "step": 9910 }, { "epoch": 1.26, "grad_norm": 0.7731843108203841, "learning_rate": 3.1600870250330155e-06, "loss": 0.5627, "step": 9911 }, { "epoch": 1.26, "grad_norm": 0.8672547607346605, "learning_rate": 3.1591278616278574e-06, "loss": 0.5336, "step": 9912 }, { "epoch": 1.26, "grad_norm": 0.795869415216268, "learning_rate": 3.158168776582743e-06, "loss": 0.5074, "step": 9913 }, { "epoch": 1.26, "grad_norm": 0.6715420976532219, "learning_rate": 3.157209769938496e-06, "loss": 0.4916, "step": 9914 }, { "epoch": 1.26, "grad_norm": 0.607640260436024, "learning_rate": 3.1562508417359433e-06, "loss": 0.462, "step": 9915 }, { "epoch": 1.26, "grad_norm": 0.7417548998133111, "learning_rate": 3.1552919920159e-06, "loss": 0.5082, "step": 9916 }, { "epoch": 1.26, "grad_norm": 0.6717004473914451, "learning_rate": 3.154333220819182e-06, "loss": 0.5019, "step": 9917 }, { "epoch": 1.26, "grad_norm": 0.6181879367924511, "learning_rate": 3.1533745281866e-06, "loss": 0.4976, "step": 9918 }, { "epoch": 1.26, "grad_norm": 0.7119358765643872, "learning_rate": 3.1524159141589644e-06, "loss": 0.4902, "step": 9919 }, { "epoch": 1.26, "grad_norm": 0.7530063034176033, "learning_rate": 3.1514573787770786e-06, "loss": 0.5051, "step": 9920 }, { "epoch": 1.26, "grad_norm": 0.7154305445386362, "learning_rate": 3.1504989220817457e-06, "loss": 0.5079, "step": 9921 }, { "epoch": 1.26, "grad_norm": 0.7660282096299629, "learning_rate": 3.1495405441137627e-06, "loss": 0.5076, "step": 9922 }, { "epoch": 1.26, "grad_norm": 0.6026131679881637, "learning_rate": 3.1485822449139263e-06, "loss": 0.4755, "step": 9923 }, { "epoch": 1.26, "grad_norm": 0.7836022467326218, "learning_rate": 3.1476240245230272e-06, "loss": 0.5113, "step": 9924 }, { "epoch": 1.26, "grad_norm": 0.6876687248022918, "learning_rate": 3.1466658829818543e-06, "loss": 0.4817, "step": 9925 }, { "epoch": 1.26, "grad_norm": 0.6415848110272832, "learning_rate": 3.1457078203311897e-06, "loss": 0.4566, "step": 9926 }, { "epoch": 1.26, "grad_norm": 0.7808714798928587, "learning_rate": 3.14474983661182e-06, "loss": 0.5687, "step": 9927 }, { "epoch": 1.26, "grad_norm": 0.7250699538423234, "learning_rate": 3.1437919318645216e-06, "loss": 0.5137, "step": 9928 }, { "epoch": 1.26, "grad_norm": 0.7075694266545647, "learning_rate": 3.1428341061300687e-06, "loss": 0.4797, "step": 9929 }, { "epoch": 1.27, "grad_norm": 0.5473386506540765, "learning_rate": 3.141876359449233e-06, "loss": 0.4402, "step": 9930 }, { "epoch": 1.27, "grad_norm": 0.7825510789290535, "learning_rate": 3.140918691862783e-06, "loss": 0.5629, "step": 9931 }, { "epoch": 1.27, "grad_norm": 0.880621228415058, "learning_rate": 3.139961103411484e-06, "loss": 0.4939, "step": 9932 }, { "epoch": 1.27, "grad_norm": 1.1042619234925284, "learning_rate": 3.1390035941360975e-06, "loss": 0.5651, "step": 9933 }, { "epoch": 1.27, "grad_norm": 0.6925194000489686, "learning_rate": 3.1380461640773803e-06, "loss": 0.5025, "step": 9934 }, { "epoch": 1.27, "grad_norm": 0.6254911281763132, "learning_rate": 3.137088813276088e-06, "loss": 0.4415, "step": 9935 }, { "epoch": 1.27, "grad_norm": 0.6445112189690168, "learning_rate": 3.136131541772972e-06, "loss": 0.4999, "step": 9936 }, { "epoch": 1.27, "grad_norm": 0.5818518250465591, "learning_rate": 3.13517434960878e-06, "loss": 0.4901, "step": 9937 }, { "epoch": 1.27, "grad_norm": 0.7642835612508834, "learning_rate": 3.134217236824257e-06, "loss": 0.5598, "step": 9938 }, { "epoch": 1.27, "grad_norm": 0.663430748715996, "learning_rate": 3.133260203460143e-06, "loss": 0.5314, "step": 9939 }, { "epoch": 1.27, "grad_norm": 0.623174695249876, "learning_rate": 3.1323032495571777e-06, "loss": 0.4703, "step": 9940 }, { "epoch": 1.27, "grad_norm": 0.6523327564812906, "learning_rate": 3.1313463751560935e-06, "loss": 0.4644, "step": 9941 }, { "epoch": 1.27, "grad_norm": 1.0993333002394308, "learning_rate": 3.1303895802976245e-06, "loss": 0.5285, "step": 9942 }, { "epoch": 1.27, "grad_norm": 0.6651099934805494, "learning_rate": 3.1294328650224965e-06, "loss": 0.506, "step": 9943 }, { "epoch": 1.27, "grad_norm": 0.6657817185498772, "learning_rate": 3.128476229371433e-06, "loss": 0.4638, "step": 9944 }, { "epoch": 1.27, "grad_norm": 0.6483782927121519, "learning_rate": 3.1275196733851575e-06, "loss": 0.4324, "step": 9945 }, { "epoch": 1.27, "grad_norm": 0.7462880908031876, "learning_rate": 3.1265631971043854e-06, "loss": 0.5101, "step": 9946 }, { "epoch": 1.27, "grad_norm": 0.7695180921923955, "learning_rate": 3.1256068005698303e-06, "loss": 0.4851, "step": 9947 }, { "epoch": 1.27, "grad_norm": 0.5995232463677926, "learning_rate": 3.1246504838222047e-06, "loss": 0.447, "step": 9948 }, { "epoch": 1.27, "grad_norm": 0.7071339803035661, "learning_rate": 3.123694246902216e-06, "loss": 0.4382, "step": 9949 }, { "epoch": 1.27, "grad_norm": 0.8026421358645626, "learning_rate": 3.1227380898505665e-06, "loss": 0.499, "step": 9950 }, { "epoch": 1.27, "grad_norm": 0.5760689060702163, "learning_rate": 3.121782012707957e-06, "loss": 0.4422, "step": 9951 }, { "epoch": 1.27, "grad_norm": 0.6093306136261478, "learning_rate": 3.120826015515086e-06, "loss": 0.4871, "step": 9952 }, { "epoch": 1.27, "grad_norm": 0.7926944690816904, "learning_rate": 3.1198700983126464e-06, "loss": 0.5273, "step": 9953 }, { "epoch": 1.27, "grad_norm": 0.6148644236290115, "learning_rate": 3.1189142611413286e-06, "loss": 0.469, "step": 9954 }, { "epoch": 1.27, "grad_norm": 0.827937456644679, "learning_rate": 3.1179585040418204e-06, "loss": 0.4898, "step": 9955 }, { "epoch": 1.27, "grad_norm": 0.60952679725037, "learning_rate": 3.1170028270548036e-06, "loss": 0.44, "step": 9956 }, { "epoch": 1.27, "grad_norm": 0.6197475712043369, "learning_rate": 3.11604723022096e-06, "loss": 0.4278, "step": 9957 }, { "epoch": 1.27, "grad_norm": 0.5446321286627389, "learning_rate": 3.1150917135809654e-06, "loss": 0.4496, "step": 9958 }, { "epoch": 1.27, "grad_norm": 0.9763700108494906, "learning_rate": 3.114136277175493e-06, "loss": 0.4882, "step": 9959 }, { "epoch": 1.27, "grad_norm": 0.5451786417747946, "learning_rate": 3.113180921045213e-06, "loss": 0.4078, "step": 9960 }, { "epoch": 1.27, "grad_norm": 0.5711121002153202, "learning_rate": 3.112225645230792e-06, "loss": 0.4433, "step": 9961 }, { "epoch": 1.27, "grad_norm": 0.6564735920284965, "learning_rate": 3.111270449772892e-06, "loss": 0.4842, "step": 9962 }, { "epoch": 1.27, "grad_norm": 0.720590846859446, "learning_rate": 3.1103153347121743e-06, "loss": 0.5187, "step": 9963 }, { "epoch": 1.27, "grad_norm": 0.6874548830380423, "learning_rate": 3.1093603000892923e-06, "loss": 0.5016, "step": 9964 }, { "epoch": 1.27, "grad_norm": 0.7900460478602355, "learning_rate": 3.1084053459449025e-06, "loss": 0.5182, "step": 9965 }, { "epoch": 1.27, "grad_norm": 0.7711855452182235, "learning_rate": 3.107450472319652e-06, "loss": 0.5315, "step": 9966 }, { "epoch": 1.27, "grad_norm": 0.612830530676528, "learning_rate": 3.1064956792541877e-06, "loss": 0.4986, "step": 9967 }, { "epoch": 1.27, "grad_norm": 0.574623794892652, "learning_rate": 3.1055409667891513e-06, "loss": 0.4765, "step": 9968 }, { "epoch": 1.27, "grad_norm": 0.6902338288936075, "learning_rate": 3.1045863349651827e-06, "loss": 0.4469, "step": 9969 }, { "epoch": 1.27, "grad_norm": 0.5685269148314458, "learning_rate": 3.1036317838229162e-06, "loss": 0.4537, "step": 9970 }, { "epoch": 1.27, "grad_norm": 0.5456361567034992, "learning_rate": 3.1026773134029854e-06, "loss": 0.4541, "step": 9971 }, { "epoch": 1.27, "grad_norm": 0.6217646636411817, "learning_rate": 3.1017229237460176e-06, "loss": 0.4837, "step": 9972 }, { "epoch": 1.27, "grad_norm": 1.2924020366665425, "learning_rate": 3.1007686148926396e-06, "loss": 0.4837, "step": 9973 }, { "epoch": 1.27, "grad_norm": 0.6089581590918436, "learning_rate": 3.0998143868834717e-06, "loss": 0.4703, "step": 9974 }, { "epoch": 1.27, "grad_norm": 0.7653251331614317, "learning_rate": 3.0988602397591338e-06, "loss": 0.482, "step": 9975 }, { "epoch": 1.27, "grad_norm": 0.5371536887762518, "learning_rate": 3.097906173560239e-06, "loss": 0.4225, "step": 9976 }, { "epoch": 1.27, "grad_norm": 0.7761111294942714, "learning_rate": 3.096952188327401e-06, "loss": 0.4499, "step": 9977 }, { "epoch": 1.27, "grad_norm": 0.6802528396942026, "learning_rate": 3.0959982841012283e-06, "loss": 0.4741, "step": 9978 }, { "epoch": 1.27, "grad_norm": 0.6928241219339091, "learning_rate": 3.095044460922323e-06, "loss": 0.4772, "step": 9979 }, { "epoch": 1.27, "grad_norm": 0.7108420253509063, "learning_rate": 3.0940907188312885e-06, "loss": 0.4493, "step": 9980 }, { "epoch": 1.27, "grad_norm": 0.602090956177948, "learning_rate": 3.0931370578687214e-06, "loss": 0.4959, "step": 9981 }, { "epoch": 1.27, "grad_norm": 0.6395492350721127, "learning_rate": 3.0921834780752163e-06, "loss": 0.5153, "step": 9982 }, { "epoch": 1.27, "grad_norm": 0.601243451801059, "learning_rate": 3.0912299794913636e-06, "loss": 0.5149, "step": 9983 }, { "epoch": 1.27, "grad_norm": 0.956703778109413, "learning_rate": 3.090276562157752e-06, "loss": 0.5875, "step": 9984 }, { "epoch": 1.27, "grad_norm": 0.7156525862103044, "learning_rate": 3.089323226114964e-06, "loss": 0.5059, "step": 9985 }, { "epoch": 1.27, "grad_norm": 0.6957222003615686, "learning_rate": 3.0883699714035807e-06, "loss": 0.4119, "step": 9986 }, { "epoch": 1.27, "grad_norm": 0.583525071761603, "learning_rate": 3.087416798064179e-06, "loss": 0.4592, "step": 9987 }, { "epoch": 1.27, "grad_norm": 0.6550509721118933, "learning_rate": 3.0864637061373306e-06, "loss": 0.4695, "step": 9988 }, { "epoch": 1.27, "grad_norm": 0.6899522372675596, "learning_rate": 3.0855106956636095e-06, "loss": 0.5119, "step": 9989 }, { "epoch": 1.27, "grad_norm": 0.6310101863850072, "learning_rate": 3.08455776668358e-06, "loss": 0.4036, "step": 9990 }, { "epoch": 1.27, "grad_norm": 0.6086763255538692, "learning_rate": 3.083604919237806e-06, "loss": 0.4114, "step": 9991 }, { "epoch": 1.27, "grad_norm": 0.6767013629384223, "learning_rate": 3.082652153366846e-06, "loss": 0.4504, "step": 9992 }, { "epoch": 1.27, "grad_norm": 0.6882547989452196, "learning_rate": 3.0816994691112567e-06, "loss": 0.4779, "step": 9993 }, { "epoch": 1.27, "grad_norm": 0.7646097099019413, "learning_rate": 3.0807468665115913e-06, "loss": 0.5501, "step": 9994 }, { "epoch": 1.27, "grad_norm": 0.5742321009665137, "learning_rate": 3.079794345608399e-06, "loss": 0.4549, "step": 9995 }, { "epoch": 1.27, "grad_norm": 0.5053944630526432, "learning_rate": 3.0788419064422256e-06, "loss": 0.3972, "step": 9996 }, { "epoch": 1.27, "grad_norm": 0.5703044952933406, "learning_rate": 3.0778895490536124e-06, "loss": 0.4103, "step": 9997 }, { "epoch": 1.27, "grad_norm": 0.7749884170205388, "learning_rate": 3.0769372734830994e-06, "loss": 0.5116, "step": 9998 }, { "epoch": 1.27, "grad_norm": 0.8219816101359045, "learning_rate": 3.075985079771221e-06, "loss": 0.5343, "step": 9999 }, { "epoch": 1.27, "grad_norm": 0.7689351467617338, "learning_rate": 3.07503296795851e-06, "loss": 0.5247, "step": 10000 }, { "epoch": 1.27, "grad_norm": 0.7387239325911442, "learning_rate": 3.074080938085493e-06, "loss": 0.4952, "step": 10001 }, { "epoch": 1.27, "grad_norm": 1.0615549005643687, "learning_rate": 3.0731289901926964e-06, "loss": 0.5688, "step": 10002 }, { "epoch": 1.27, "grad_norm": 0.7533877985632432, "learning_rate": 3.072177124320641e-06, "loss": 0.5156, "step": 10003 }, { "epoch": 1.27, "grad_norm": 0.7618768628850636, "learning_rate": 3.0712253405098456e-06, "loss": 0.5744, "step": 10004 }, { "epoch": 1.27, "grad_norm": 0.7585952723183147, "learning_rate": 3.0702736388008247e-06, "loss": 0.5398, "step": 10005 }, { "epoch": 1.27, "grad_norm": 0.7553428966500044, "learning_rate": 3.0693220192340876e-06, "loss": 0.5206, "step": 10006 }, { "epoch": 1.27, "grad_norm": 0.6108437038033367, "learning_rate": 3.0683704818501438e-06, "loss": 0.48, "step": 10007 }, { "epoch": 1.27, "grad_norm": 0.8480712547236272, "learning_rate": 3.067419026689495e-06, "loss": 0.5152, "step": 10008 }, { "epoch": 1.28, "grad_norm": 0.7075026472470567, "learning_rate": 3.066467653792643e-06, "loss": 0.4912, "step": 10009 }, { "epoch": 1.28, "grad_norm": 0.7245669215115459, "learning_rate": 3.065516363200084e-06, "loss": 0.4735, "step": 10010 }, { "epoch": 1.28, "grad_norm": 0.7473986085675763, "learning_rate": 3.0645651549523114e-06, "loss": 0.5026, "step": 10011 }, { "epoch": 1.28, "grad_norm": 0.5899756990735727, "learning_rate": 3.0636140290898166e-06, "loss": 0.4779, "step": 10012 }, { "epoch": 1.28, "grad_norm": 0.8824406665943079, "learning_rate": 3.062662985653082e-06, "loss": 0.518, "step": 10013 }, { "epoch": 1.28, "grad_norm": 0.713828159259407, "learning_rate": 3.061712024682596e-06, "loss": 0.5213, "step": 10014 }, { "epoch": 1.28, "grad_norm": 0.7137738184450169, "learning_rate": 3.0607611462188342e-06, "loss": 0.4667, "step": 10015 }, { "epoch": 1.28, "grad_norm": 0.6984211698335809, "learning_rate": 3.0598103503022734e-06, "loss": 0.4949, "step": 10016 }, { "epoch": 1.28, "grad_norm": 0.5803382187767104, "learning_rate": 3.0588596369733863e-06, "loss": 0.4371, "step": 10017 }, { "epoch": 1.28, "grad_norm": 0.704751079108312, "learning_rate": 3.0579090062726415e-06, "loss": 0.5002, "step": 10018 }, { "epoch": 1.28, "grad_norm": 0.7865243535100296, "learning_rate": 3.0569584582405044e-06, "loss": 0.5106, "step": 10019 }, { "epoch": 1.28, "grad_norm": 0.6013805575762681, "learning_rate": 3.056007992917437e-06, "loss": 0.4607, "step": 10020 }, { "epoch": 1.28, "grad_norm": 0.6338076601598049, "learning_rate": 3.0550576103438966e-06, "loss": 0.4666, "step": 10021 }, { "epoch": 1.28, "grad_norm": 0.8577282948578187, "learning_rate": 3.0541073105603387e-06, "loss": 0.5421, "step": 10022 }, { "epoch": 1.28, "grad_norm": 0.7320387177694757, "learning_rate": 3.053157093607214e-06, "loss": 0.5511, "step": 10023 }, { "epoch": 1.28, "grad_norm": 0.6883125579858914, "learning_rate": 3.0522069595249717e-06, "loss": 0.4824, "step": 10024 }, { "epoch": 1.28, "grad_norm": 0.6183742614961625, "learning_rate": 3.0512569083540537e-06, "loss": 0.4445, "step": 10025 }, { "epoch": 1.28, "grad_norm": 0.7261391838529112, "learning_rate": 3.0503069401349006e-06, "loss": 0.5036, "step": 10026 }, { "epoch": 1.28, "grad_norm": 0.651606948081618, "learning_rate": 3.0493570549079522e-06, "loss": 0.4829, "step": 10027 }, { "epoch": 1.28, "grad_norm": 0.676641055208218, "learning_rate": 3.048407252713641e-06, "loss": 0.513, "step": 10028 }, { "epoch": 1.28, "grad_norm": 0.7831625782380829, "learning_rate": 3.0474575335923967e-06, "loss": 0.5288, "step": 10029 }, { "epoch": 1.28, "grad_norm": 0.7136661129400382, "learning_rate": 3.046507897584645e-06, "loss": 0.5211, "step": 10030 }, { "epoch": 1.28, "grad_norm": 0.8144461603238857, "learning_rate": 3.0455583447308105e-06, "loss": 0.4883, "step": 10031 }, { "epoch": 1.28, "grad_norm": 0.7931373990728641, "learning_rate": 3.0446088750713123e-06, "loss": 0.5091, "step": 10032 }, { "epoch": 1.28, "grad_norm": 0.5697133546601547, "learning_rate": 3.043659488646564e-06, "loss": 0.4389, "step": 10033 }, { "epoch": 1.28, "grad_norm": 0.5604022302195278, "learning_rate": 3.042710185496981e-06, "loss": 0.437, "step": 10034 }, { "epoch": 1.28, "grad_norm": 0.641352831044806, "learning_rate": 3.041760965662971e-06, "loss": 0.4454, "step": 10035 }, { "epoch": 1.28, "grad_norm": 0.8715159717674851, "learning_rate": 3.040811829184938e-06, "loss": 0.5269, "step": 10036 }, { "epoch": 1.28, "grad_norm": 0.8903074305787414, "learning_rate": 3.039862776103285e-06, "loss": 0.4897, "step": 10037 }, { "epoch": 1.28, "grad_norm": 0.9696778903792544, "learning_rate": 3.0389138064584084e-06, "loss": 0.5127, "step": 10038 }, { "epoch": 1.28, "grad_norm": 0.6020712900758779, "learning_rate": 3.037964920290706e-06, "loss": 0.4542, "step": 10039 }, { "epoch": 1.28, "grad_norm": 0.7057177378866583, "learning_rate": 3.0370161176405676e-06, "loss": 0.4966, "step": 10040 }, { "epoch": 1.28, "grad_norm": 0.8297266839166492, "learning_rate": 3.0360673985483803e-06, "loss": 0.5246, "step": 10041 }, { "epoch": 1.28, "grad_norm": 0.5756645173131348, "learning_rate": 3.0351187630545277e-06, "loss": 0.4836, "step": 10042 }, { "epoch": 1.28, "grad_norm": 0.7288508495495228, "learning_rate": 3.03417021119939e-06, "loss": 0.5216, "step": 10043 }, { "epoch": 1.28, "grad_norm": 0.6407747002090415, "learning_rate": 3.0332217430233453e-06, "loss": 0.4732, "step": 10044 }, { "epoch": 1.28, "grad_norm": 0.8804538125993043, "learning_rate": 3.0322733585667653e-06, "loss": 0.4997, "step": 10045 }, { "epoch": 1.28, "grad_norm": 0.6284289269867019, "learning_rate": 3.0313250578700217e-06, "loss": 0.4698, "step": 10046 }, { "epoch": 1.28, "grad_norm": 0.7854980850057762, "learning_rate": 3.030376840973478e-06, "loss": 0.5258, "step": 10047 }, { "epoch": 1.28, "grad_norm": 0.7068188670250871, "learning_rate": 3.0294287079174987e-06, "loss": 0.5143, "step": 10048 }, { "epoch": 1.28, "grad_norm": 0.7952001354525379, "learning_rate": 3.0284806587424424e-06, "loss": 0.5979, "step": 10049 }, { "epoch": 1.28, "grad_norm": 0.7092844791672158, "learning_rate": 3.0275326934886628e-06, "loss": 0.4552, "step": 10050 }, { "epoch": 1.28, "grad_norm": 0.6795123172734648, "learning_rate": 3.0265848121965145e-06, "loss": 0.507, "step": 10051 }, { "epoch": 1.28, "grad_norm": 0.6838918044613908, "learning_rate": 3.0256370149063453e-06, "loss": 0.5623, "step": 10052 }, { "epoch": 1.28, "grad_norm": 0.8283735266829618, "learning_rate": 3.0246893016584987e-06, "loss": 0.525, "step": 10053 }, { "epoch": 1.28, "grad_norm": 0.8431487495445705, "learning_rate": 3.0237416724933166e-06, "loss": 0.5078, "step": 10054 }, { "epoch": 1.28, "grad_norm": 0.618514115349961, "learning_rate": 3.022794127451136e-06, "loss": 0.4861, "step": 10055 }, { "epoch": 1.28, "grad_norm": 0.7774048537524458, "learning_rate": 3.021846666572291e-06, "loss": 0.507, "step": 10056 }, { "epoch": 1.28, "grad_norm": 0.6343040788754329, "learning_rate": 3.020899289897113e-06, "loss": 0.4459, "step": 10057 }, { "epoch": 1.28, "grad_norm": 0.6149030932862146, "learning_rate": 3.0199519974659276e-06, "loss": 0.4797, "step": 10058 }, { "epoch": 1.28, "grad_norm": 0.8142902832891326, "learning_rate": 3.0190047893190575e-06, "loss": 0.5064, "step": 10059 }, { "epoch": 1.28, "grad_norm": 0.5875697442357543, "learning_rate": 3.0180576654968242e-06, "loss": 0.4749, "step": 10060 }, { "epoch": 1.28, "grad_norm": 0.5807271333060996, "learning_rate": 3.017110626039542e-06, "loss": 0.4328, "step": 10061 }, { "epoch": 1.28, "grad_norm": 0.8156702292407569, "learning_rate": 3.0161636709875245e-06, "loss": 0.4791, "step": 10062 }, { "epoch": 1.28, "grad_norm": 0.8883279805108075, "learning_rate": 3.0152168003810788e-06, "loss": 0.5289, "step": 10063 }, { "epoch": 1.28, "grad_norm": 0.7966769391439963, "learning_rate": 3.014270014260513e-06, "loss": 0.5244, "step": 10064 }, { "epoch": 1.28, "grad_norm": 1.303583516155905, "learning_rate": 3.013323312666126e-06, "loss": 0.4876, "step": 10065 }, { "epoch": 1.28, "grad_norm": 0.58675524657906, "learning_rate": 3.0123766956382177e-06, "loss": 0.4469, "step": 10066 }, { "epoch": 1.28, "grad_norm": 0.7589867412843092, "learning_rate": 3.011430163217082e-06, "loss": 0.586, "step": 10067 }, { "epoch": 1.28, "grad_norm": 0.7693628082801335, "learning_rate": 3.01048371544301e-06, "loss": 0.5209, "step": 10068 }, { "epoch": 1.28, "grad_norm": 0.5944715712428372, "learning_rate": 3.009537352356289e-06, "loss": 0.4545, "step": 10069 }, { "epoch": 1.28, "grad_norm": 0.7005635876539956, "learning_rate": 3.0085910739972013e-06, "loss": 0.4282, "step": 10070 }, { "epoch": 1.28, "grad_norm": 0.8225769212919801, "learning_rate": 3.0076448804060286e-06, "loss": 0.5229, "step": 10071 }, { "epoch": 1.28, "grad_norm": 1.0036227321852822, "learning_rate": 3.006698771623046e-06, "loss": 0.472, "step": 10072 }, { "epoch": 1.28, "grad_norm": 0.5925436969464989, "learning_rate": 3.005752747688528e-06, "loss": 0.44, "step": 10073 }, { "epoch": 1.28, "grad_norm": 0.6921521343201524, "learning_rate": 3.004806808642742e-06, "loss": 0.463, "step": 10074 }, { "epoch": 1.28, "grad_norm": 0.60239773074461, "learning_rate": 3.003860954525954e-06, "loss": 0.4626, "step": 10075 }, { "epoch": 1.28, "grad_norm": 0.7077316777280128, "learning_rate": 3.002915185378427e-06, "loss": 0.5023, "step": 10076 }, { "epoch": 1.28, "grad_norm": 0.7522666144284539, "learning_rate": 3.0019695012404193e-06, "loss": 0.5559, "step": 10077 }, { "epoch": 1.28, "grad_norm": 0.8185355937073601, "learning_rate": 3.001023902152185e-06, "loss": 0.5093, "step": 10078 }, { "epoch": 1.28, "grad_norm": 0.9198465109077911, "learning_rate": 3.0000783881539747e-06, "loss": 0.5868, "step": 10079 }, { "epoch": 1.28, "grad_norm": 0.8550110783014858, "learning_rate": 2.9991329592860376e-06, "loss": 0.5043, "step": 10080 }, { "epoch": 1.28, "grad_norm": 0.734583464318067, "learning_rate": 2.998187615588616e-06, "loss": 0.4729, "step": 10081 }, { "epoch": 1.28, "grad_norm": 0.6177460097427718, "learning_rate": 2.9972423571019503e-06, "loss": 0.4996, "step": 10082 }, { "epoch": 1.28, "grad_norm": 0.9354631800349099, "learning_rate": 2.996297183866278e-06, "loss": 0.5243, "step": 10083 }, { "epoch": 1.28, "grad_norm": 0.9388552029761114, "learning_rate": 2.995352095921832e-06, "loss": 0.4607, "step": 10084 }, { "epoch": 1.28, "grad_norm": 0.673897873608996, "learning_rate": 2.9944070933088408e-06, "loss": 0.5089, "step": 10085 }, { "epoch": 1.28, "grad_norm": 0.7970155534274046, "learning_rate": 2.9934621760675308e-06, "loss": 0.5325, "step": 10086 }, { "epoch": 1.29, "grad_norm": 0.6616889414459125, "learning_rate": 2.992517344238124e-06, "loss": 0.4741, "step": 10087 }, { "epoch": 1.29, "grad_norm": 0.6796734364489979, "learning_rate": 2.991572597860837e-06, "loss": 0.4344, "step": 10088 }, { "epoch": 1.29, "grad_norm": 0.7216796163782366, "learning_rate": 2.990627936975888e-06, "loss": 0.4914, "step": 10089 }, { "epoch": 1.29, "grad_norm": 0.7493563605490247, "learning_rate": 2.989683361623488e-06, "loss": 0.4512, "step": 10090 }, { "epoch": 1.29, "grad_norm": 0.6755188370213615, "learning_rate": 2.988738871843842e-06, "loss": 0.4529, "step": 10091 }, { "epoch": 1.29, "grad_norm": 0.9182093900054465, "learning_rate": 2.9877944676771554e-06, "loss": 0.5545, "step": 10092 }, { "epoch": 1.29, "grad_norm": 1.1928959872543183, "learning_rate": 2.986850149163628e-06, "loss": 0.5176, "step": 10093 }, { "epoch": 1.29, "grad_norm": 0.737471400068593, "learning_rate": 2.985905916343458e-06, "loss": 0.5067, "step": 10094 }, { "epoch": 1.29, "grad_norm": 0.6022972409017776, "learning_rate": 2.9849617692568356e-06, "loss": 0.4609, "step": 10095 }, { "epoch": 1.29, "grad_norm": 0.8303501827301287, "learning_rate": 2.984017707943952e-06, "loss": 0.5094, "step": 10096 }, { "epoch": 1.29, "grad_norm": 0.876549585807127, "learning_rate": 2.983073732444992e-06, "loss": 0.5175, "step": 10097 }, { "epoch": 1.29, "grad_norm": 0.594992338486311, "learning_rate": 2.982129842800139e-06, "loss": 0.4495, "step": 10098 }, { "epoch": 1.29, "grad_norm": 0.7201306166707563, "learning_rate": 2.9811860390495694e-06, "loss": 0.4328, "step": 10099 }, { "epoch": 1.29, "grad_norm": 0.8887686741218395, "learning_rate": 2.9802423212334575e-06, "loss": 0.4785, "step": 10100 }, { "epoch": 1.29, "grad_norm": 0.8867668237412709, "learning_rate": 2.9792986893919774e-06, "loss": 0.5371, "step": 10101 }, { "epoch": 1.29, "grad_norm": 0.9830089174735464, "learning_rate": 2.9783551435652947e-06, "loss": 0.467, "step": 10102 }, { "epoch": 1.29, "grad_norm": 0.5411161149393208, "learning_rate": 2.977411683793574e-06, "loss": 0.4248, "step": 10103 }, { "epoch": 1.29, "grad_norm": 0.6652680659069923, "learning_rate": 2.9764683101169746e-06, "loss": 0.4859, "step": 10104 }, { "epoch": 1.29, "grad_norm": 0.8077801746704958, "learning_rate": 2.9755250225756525e-06, "loss": 0.5204, "step": 10105 }, { "epoch": 1.29, "grad_norm": 0.7764389497501268, "learning_rate": 2.974581821209761e-06, "loss": 0.5202, "step": 10106 }, { "epoch": 1.29, "grad_norm": 0.7197404009164483, "learning_rate": 2.9736387060594484e-06, "loss": 0.4562, "step": 10107 }, { "epoch": 1.29, "grad_norm": 0.8357037768159049, "learning_rate": 2.9726956771648616e-06, "loss": 0.5501, "step": 10108 }, { "epoch": 1.29, "grad_norm": 0.67724049421269, "learning_rate": 2.971752734566142e-06, "loss": 0.5183, "step": 10109 }, { "epoch": 1.29, "grad_norm": 0.7044068443668652, "learning_rate": 2.9708098783034263e-06, "loss": 0.4752, "step": 10110 }, { "epoch": 1.29, "grad_norm": 0.7033922958840514, "learning_rate": 2.9698671084168497e-06, "loss": 0.5317, "step": 10111 }, { "epoch": 1.29, "grad_norm": 0.7709944479576237, "learning_rate": 2.968924424946541e-06, "loss": 0.5065, "step": 10112 }, { "epoch": 1.29, "grad_norm": 0.6444397679009273, "learning_rate": 2.967981827932631e-06, "loss": 0.4504, "step": 10113 }, { "epoch": 1.29, "grad_norm": 0.6266647373921675, "learning_rate": 2.9670393174152417e-06, "loss": 0.4897, "step": 10114 }, { "epoch": 1.29, "grad_norm": 0.7758081475425422, "learning_rate": 2.966096893434492e-06, "loss": 0.5538, "step": 10115 }, { "epoch": 1.29, "grad_norm": 0.639264055117079, "learning_rate": 2.9651545560304986e-06, "loss": 0.4487, "step": 10116 }, { "epoch": 1.29, "grad_norm": 0.7670169816847134, "learning_rate": 2.9642123052433725e-06, "loss": 0.5225, "step": 10117 }, { "epoch": 1.29, "grad_norm": 0.765958751322079, "learning_rate": 2.963270141113223e-06, "loss": 0.5261, "step": 10118 }, { "epoch": 1.29, "grad_norm": 0.7417938385491667, "learning_rate": 2.9623280636801554e-06, "loss": 0.4577, "step": 10119 }, { "epoch": 1.29, "grad_norm": 0.6330606280845634, "learning_rate": 2.9613860729842714e-06, "loss": 0.4686, "step": 10120 }, { "epoch": 1.29, "grad_norm": 0.8042711924059736, "learning_rate": 2.960444169065667e-06, "loss": 0.5317, "step": 10121 }, { "epoch": 1.29, "grad_norm": 0.6931330767737331, "learning_rate": 2.959502351964437e-06, "loss": 0.4783, "step": 10122 }, { "epoch": 1.29, "grad_norm": 0.6227064661592011, "learning_rate": 2.9585606217206714e-06, "loss": 0.4434, "step": 10123 }, { "epoch": 1.29, "grad_norm": 0.6326883520728792, "learning_rate": 2.957618978374457e-06, "loss": 0.4877, "step": 10124 }, { "epoch": 1.29, "grad_norm": 0.7003053110487208, "learning_rate": 2.9566774219658744e-06, "loss": 0.447, "step": 10125 }, { "epoch": 1.29, "grad_norm": 0.6386031879147133, "learning_rate": 2.955735952535006e-06, "loss": 0.4468, "step": 10126 }, { "epoch": 1.29, "grad_norm": 0.7669863806583794, "learning_rate": 2.9547945701219245e-06, "loss": 0.4719, "step": 10127 }, { "epoch": 1.29, "grad_norm": 0.6717987016775734, "learning_rate": 2.9538532747667036e-06, "loss": 0.4692, "step": 10128 }, { "epoch": 1.29, "grad_norm": 0.8611211247390582, "learning_rate": 2.9529120665094103e-06, "loss": 0.5389, "step": 10129 }, { "epoch": 1.29, "grad_norm": 0.5387871274834539, "learning_rate": 2.951970945390108e-06, "loss": 0.4386, "step": 10130 }, { "epoch": 1.29, "grad_norm": 0.6376877981854637, "learning_rate": 2.9510299114488592e-06, "loss": 0.4805, "step": 10131 }, { "epoch": 1.29, "grad_norm": 0.6348081336631767, "learning_rate": 2.9500889647257193e-06, "loss": 0.5089, "step": 10132 }, { "epoch": 1.29, "grad_norm": 0.645884238847916, "learning_rate": 2.9491481052607406e-06, "loss": 0.5079, "step": 10133 }, { "epoch": 1.29, "grad_norm": 0.6452682051769816, "learning_rate": 2.9482073330939743e-06, "loss": 0.4876, "step": 10134 }, { "epoch": 1.29, "grad_norm": 0.8376356155256386, "learning_rate": 2.947266648265465e-06, "loss": 0.5016, "step": 10135 }, { "epoch": 1.29, "grad_norm": 0.6292333787968463, "learning_rate": 2.9463260508152547e-06, "loss": 0.4734, "step": 10136 }, { "epoch": 1.29, "grad_norm": 0.8373799011921564, "learning_rate": 2.945385540783381e-06, "loss": 0.5214, "step": 10137 }, { "epoch": 1.29, "grad_norm": 0.6615495878018989, "learning_rate": 2.9444451182098805e-06, "loss": 0.5056, "step": 10138 }, { "epoch": 1.29, "grad_norm": 1.201925206034663, "learning_rate": 2.9435047831347824e-06, "loss": 0.5585, "step": 10139 }, { "epoch": 1.29, "grad_norm": 1.1188336960899259, "learning_rate": 2.9425645355981143e-06, "loss": 0.5157, "step": 10140 }, { "epoch": 1.29, "grad_norm": 0.7477213854610992, "learning_rate": 2.9416243756398986e-06, "loss": 0.4936, "step": 10141 }, { "epoch": 1.29, "grad_norm": 0.7815082644484085, "learning_rate": 2.9406843033001563e-06, "loss": 0.4832, "step": 10142 }, { "epoch": 1.29, "grad_norm": 0.8209542514004152, "learning_rate": 2.9397443186189024e-06, "loss": 0.5489, "step": 10143 }, { "epoch": 1.29, "grad_norm": 0.6637044558774606, "learning_rate": 2.9388044216361492e-06, "loss": 0.4937, "step": 10144 }, { "epoch": 1.29, "grad_norm": 0.5880107320278892, "learning_rate": 2.9378646123919054e-06, "loss": 0.4471, "step": 10145 }, { "epoch": 1.29, "grad_norm": 0.8429328183645565, "learning_rate": 2.936924890926175e-06, "loss": 0.5619, "step": 10146 }, { "epoch": 1.29, "grad_norm": 0.7516802532245696, "learning_rate": 2.9359852572789594e-06, "loss": 0.5099, "step": 10147 }, { "epoch": 1.29, "grad_norm": 0.6508521883211092, "learning_rate": 2.9350457114902565e-06, "loss": 0.479, "step": 10148 }, { "epoch": 1.29, "grad_norm": 0.6544020286912982, "learning_rate": 2.934106253600058e-06, "loss": 0.4783, "step": 10149 }, { "epoch": 1.29, "grad_norm": 0.6214310053089499, "learning_rate": 2.933166883648353e-06, "loss": 0.5004, "step": 10150 }, { "epoch": 1.29, "grad_norm": 1.2185275914777103, "learning_rate": 2.932227601675131e-06, "loss": 0.5462, "step": 10151 }, { "epoch": 1.29, "grad_norm": 0.7644958094851502, "learning_rate": 2.9312884077203725e-06, "loss": 0.4934, "step": 10152 }, { "epoch": 1.29, "grad_norm": 0.5779097297825884, "learning_rate": 2.930349301824056e-06, "loss": 0.4473, "step": 10153 }, { "epoch": 1.29, "grad_norm": 0.6559143254130309, "learning_rate": 2.929410284026156e-06, "loss": 0.5137, "step": 10154 }, { "epoch": 1.29, "grad_norm": 0.725209318894737, "learning_rate": 2.9284713543666434e-06, "loss": 0.528, "step": 10155 }, { "epoch": 1.29, "grad_norm": 0.7034797640431257, "learning_rate": 2.9275325128854854e-06, "loss": 0.5068, "step": 10156 }, { "epoch": 1.29, "grad_norm": 0.5858274836647018, "learning_rate": 2.926593759622646e-06, "loss": 0.4194, "step": 10157 }, { "epoch": 1.29, "grad_norm": 0.6500672658007484, "learning_rate": 2.925655094618084e-06, "loss": 0.4793, "step": 10158 }, { "epoch": 1.29, "grad_norm": 0.7217885520479481, "learning_rate": 2.9247165179117567e-06, "loss": 0.4786, "step": 10159 }, { "epoch": 1.29, "grad_norm": 0.634553578264273, "learning_rate": 2.9237780295436153e-06, "loss": 0.4835, "step": 10160 }, { "epoch": 1.29, "grad_norm": 0.8408847210221582, "learning_rate": 2.9228396295536083e-06, "loss": 0.5354, "step": 10161 }, { "epoch": 1.29, "grad_norm": 0.7644156960060315, "learning_rate": 2.921901317981679e-06, "loss": 0.5325, "step": 10162 }, { "epoch": 1.29, "grad_norm": 0.8285579352659967, "learning_rate": 2.9209630948677715e-06, "loss": 0.5264, "step": 10163 }, { "epoch": 1.29, "grad_norm": 0.7487038187038251, "learning_rate": 2.9200249602518215e-06, "loss": 0.5284, "step": 10164 }, { "epoch": 1.29, "grad_norm": 0.6425912550011786, "learning_rate": 2.9190869141737628e-06, "loss": 0.5401, "step": 10165 }, { "epoch": 1.3, "grad_norm": 0.7359199837551312, "learning_rate": 2.9181489566735256e-06, "loss": 0.5379, "step": 10166 }, { "epoch": 1.3, "grad_norm": 0.8129476332300664, "learning_rate": 2.917211087791032e-06, "loss": 0.5472, "step": 10167 }, { "epoch": 1.3, "grad_norm": 0.6923659456726867, "learning_rate": 2.9162733075662087e-06, "loss": 0.4581, "step": 10168 }, { "epoch": 1.3, "grad_norm": 0.7804021163281288, "learning_rate": 2.91533561603897e-06, "loss": 0.5254, "step": 10169 }, { "epoch": 1.3, "grad_norm": 0.8436480174237487, "learning_rate": 2.9143980132492334e-06, "loss": 0.5892, "step": 10170 }, { "epoch": 1.3, "grad_norm": 0.8545459823577821, "learning_rate": 2.9134604992369097e-06, "loss": 0.536, "step": 10171 }, { "epoch": 1.3, "grad_norm": 0.654194378378166, "learning_rate": 2.9125230740419037e-06, "loss": 0.4624, "step": 10172 }, { "epoch": 1.3, "grad_norm": 0.6705653229842632, "learning_rate": 2.911585737704121e-06, "loss": 0.4559, "step": 10173 }, { "epoch": 1.3, "grad_norm": 0.6687289069006598, "learning_rate": 2.910648490263459e-06, "loss": 0.4759, "step": 10174 }, { "epoch": 1.3, "grad_norm": 0.6442381443013449, "learning_rate": 2.9097113317598135e-06, "loss": 0.4706, "step": 10175 }, { "epoch": 1.3, "grad_norm": 0.6667896788204748, "learning_rate": 2.908774262233079e-06, "loss": 0.4529, "step": 10176 }, { "epoch": 1.3, "grad_norm": 0.5904164099469337, "learning_rate": 2.907837281723139e-06, "loss": 0.4583, "step": 10177 }, { "epoch": 1.3, "grad_norm": 0.55932948275627, "learning_rate": 2.9069003902698833e-06, "loss": 0.4731, "step": 10178 }, { "epoch": 1.3, "grad_norm": 0.8565273952319515, "learning_rate": 2.905963587913187e-06, "loss": 0.4672, "step": 10179 }, { "epoch": 1.3, "grad_norm": 0.7750285684129028, "learning_rate": 2.905026874692931e-06, "loss": 0.5748, "step": 10180 }, { "epoch": 1.3, "grad_norm": 0.7166605566136042, "learning_rate": 2.904090250648985e-06, "loss": 0.5358, "step": 10181 }, { "epoch": 1.3, "grad_norm": 0.6339450470409902, "learning_rate": 2.903153715821221e-06, "loss": 0.4702, "step": 10182 }, { "epoch": 1.3, "grad_norm": 0.9731304868139452, "learning_rate": 2.9022172702495004e-06, "loss": 0.5219, "step": 10183 }, { "epoch": 1.3, "grad_norm": 0.7514882638573377, "learning_rate": 2.9012809139736895e-06, "loss": 0.4829, "step": 10184 }, { "epoch": 1.3, "grad_norm": 0.7475335600527702, "learning_rate": 2.9003446470336415e-06, "loss": 0.5333, "step": 10185 }, { "epoch": 1.3, "grad_norm": 0.6985193720968823, "learning_rate": 2.899408469469214e-06, "loss": 0.5511, "step": 10186 }, { "epoch": 1.3, "grad_norm": 0.8727280185459015, "learning_rate": 2.898472381320252e-06, "loss": 0.5722, "step": 10187 }, { "epoch": 1.3, "grad_norm": 0.7734805303035921, "learning_rate": 2.897536382626609e-06, "loss": 0.4853, "step": 10188 }, { "epoch": 1.3, "grad_norm": 0.6444911244666087, "learning_rate": 2.8966004734281216e-06, "loss": 0.4859, "step": 10189 }, { "epoch": 1.3, "grad_norm": 0.7303950249005075, "learning_rate": 2.895664653764633e-06, "loss": 0.5195, "step": 10190 }, { "epoch": 1.3, "grad_norm": 0.6402140605767778, "learning_rate": 2.8947289236759725e-06, "loss": 0.4515, "step": 10191 }, { "epoch": 1.3, "grad_norm": 0.6763932478440389, "learning_rate": 2.8937932832019767e-06, "loss": 0.4783, "step": 10192 }, { "epoch": 1.3, "grad_norm": 0.6579200449093628, "learning_rate": 2.8928577323824687e-06, "loss": 0.4732, "step": 10193 }, { "epoch": 1.3, "grad_norm": 0.7027925633443014, "learning_rate": 2.8919222712572755e-06, "loss": 0.4976, "step": 10194 }, { "epoch": 1.3, "grad_norm": 0.5671219139174667, "learning_rate": 2.8909868998662127e-06, "loss": 0.4658, "step": 10195 }, { "epoch": 1.3, "grad_norm": 0.9705835662620246, "learning_rate": 2.8900516182491002e-06, "loss": 0.5174, "step": 10196 }, { "epoch": 1.3, "grad_norm": 0.7697562743226838, "learning_rate": 2.8891164264457457e-06, "loss": 0.4861, "step": 10197 }, { "epoch": 1.3, "grad_norm": 0.6050093154754461, "learning_rate": 2.8881813244959612e-06, "loss": 0.5307, "step": 10198 }, { "epoch": 1.3, "grad_norm": 0.8398249656435676, "learning_rate": 2.8872463124395457e-06, "loss": 0.5081, "step": 10199 }, { "epoch": 1.3, "grad_norm": 0.5772250485459917, "learning_rate": 2.8863113903163078e-06, "loss": 0.4704, "step": 10200 }, { "epoch": 1.3, "grad_norm": 0.679098520169189, "learning_rate": 2.8853765581660365e-06, "loss": 0.5039, "step": 10201 }, { "epoch": 1.3, "grad_norm": 0.6360127132472283, "learning_rate": 2.8844418160285305e-06, "loss": 0.4763, "step": 10202 }, { "epoch": 1.3, "grad_norm": 0.7243562471193774, "learning_rate": 2.883507163943574e-06, "loss": 0.5064, "step": 10203 }, { "epoch": 1.3, "grad_norm": 0.7163903348118532, "learning_rate": 2.882572601950956e-06, "loss": 0.4583, "step": 10204 }, { "epoch": 1.3, "grad_norm": 0.5711941198400342, "learning_rate": 2.881638130090454e-06, "loss": 0.4582, "step": 10205 }, { "epoch": 1.3, "grad_norm": 0.6274230845694725, "learning_rate": 2.8807037484018496e-06, "loss": 0.4164, "step": 10206 }, { "epoch": 1.3, "grad_norm": 0.6260335194664035, "learning_rate": 2.8797694569249122e-06, "loss": 0.5005, "step": 10207 }, { "epoch": 1.3, "grad_norm": 0.7128907202421917, "learning_rate": 2.878835255699416e-06, "loss": 0.533, "step": 10208 }, { "epoch": 1.3, "grad_norm": 0.7845323777474015, "learning_rate": 2.8779011447651227e-06, "loss": 0.5281, "step": 10209 }, { "epoch": 1.3, "grad_norm": 0.747650927201233, "learning_rate": 2.876967124161798e-06, "loss": 0.5459, "step": 10210 }, { "epoch": 1.3, "grad_norm": 0.7193081240913091, "learning_rate": 2.876033193929197e-06, "loss": 0.5552, "step": 10211 }, { "epoch": 1.3, "grad_norm": 0.7914127652799089, "learning_rate": 2.8750993541070753e-06, "loss": 0.5435, "step": 10212 }, { "epoch": 1.3, "grad_norm": 0.7084305546304169, "learning_rate": 2.874165604735184e-06, "loss": 0.5492, "step": 10213 }, { "epoch": 1.3, "grad_norm": 0.5880303783712654, "learning_rate": 2.8732319458532722e-06, "loss": 0.4228, "step": 10214 }, { "epoch": 1.3, "grad_norm": 0.6229318713636494, "learning_rate": 2.8722983775010775e-06, "loss": 0.4712, "step": 10215 }, { "epoch": 1.3, "grad_norm": 0.6756154003694903, "learning_rate": 2.871364899718344e-06, "loss": 0.503, "step": 10216 }, { "epoch": 1.3, "grad_norm": 0.7218600006010616, "learning_rate": 2.8704315125448027e-06, "loss": 0.5003, "step": 10217 }, { "epoch": 1.3, "grad_norm": 0.6322725850395896, "learning_rate": 2.869498216020189e-06, "loss": 0.4387, "step": 10218 }, { "epoch": 1.3, "grad_norm": 0.6896654022079284, "learning_rate": 2.8685650101842257e-06, "loss": 0.4967, "step": 10219 }, { "epoch": 1.3, "grad_norm": 0.7053372394926651, "learning_rate": 2.8676318950766413e-06, "loss": 0.4903, "step": 10220 }, { "epoch": 1.3, "grad_norm": 0.6199428687595725, "learning_rate": 2.8666988707371507e-06, "loss": 0.4807, "step": 10221 }, { "epoch": 1.3, "grad_norm": 0.6769955706031948, "learning_rate": 2.8657659372054743e-06, "loss": 0.4677, "step": 10222 }, { "epoch": 1.3, "grad_norm": 0.675305212036312, "learning_rate": 2.86483309452132e-06, "loss": 0.5068, "step": 10223 }, { "epoch": 1.3, "grad_norm": 0.8707066448333448, "learning_rate": 2.8639003427243974e-06, "loss": 0.4643, "step": 10224 }, { "epoch": 1.3, "grad_norm": 0.6364961778623395, "learning_rate": 2.8629676818544135e-06, "loss": 0.4276, "step": 10225 }, { "epoch": 1.3, "grad_norm": 0.6801140971692162, "learning_rate": 2.862035111951064e-06, "loss": 0.4505, "step": 10226 }, { "epoch": 1.3, "grad_norm": 0.6604427994928885, "learning_rate": 2.86110263305405e-06, "loss": 0.4693, "step": 10227 }, { "epoch": 1.3, "grad_norm": 0.7986591803117429, "learning_rate": 2.860170245203061e-06, "loss": 0.4842, "step": 10228 }, { "epoch": 1.3, "grad_norm": 0.5920876835762492, "learning_rate": 2.8592379484377874e-06, "loss": 0.4548, "step": 10229 }, { "epoch": 1.3, "grad_norm": 0.5540399558151131, "learning_rate": 2.8583057427979123e-06, "loss": 0.4252, "step": 10230 }, { "epoch": 1.3, "grad_norm": 0.6078661459183704, "learning_rate": 2.8573736283231172e-06, "loss": 0.4432, "step": 10231 }, { "epoch": 1.3, "grad_norm": 0.6682072685001961, "learning_rate": 2.8564416050530826e-06, "loss": 0.4582, "step": 10232 }, { "epoch": 1.3, "grad_norm": 0.5689592425546806, "learning_rate": 2.8555096730274768e-06, "loss": 0.4613, "step": 10233 }, { "epoch": 1.3, "grad_norm": 0.7428411694563717, "learning_rate": 2.854577832285973e-06, "loss": 0.4917, "step": 10234 }, { "epoch": 1.3, "grad_norm": 0.7442687694616843, "learning_rate": 2.8536460828682335e-06, "loss": 0.5116, "step": 10235 }, { "epoch": 1.3, "grad_norm": 0.848214475523199, "learning_rate": 2.852714424813921e-06, "loss": 0.5252, "step": 10236 }, { "epoch": 1.3, "grad_norm": 0.6479163829099377, "learning_rate": 2.851782858162696e-06, "loss": 0.5247, "step": 10237 }, { "epoch": 1.3, "grad_norm": 0.7847061284805814, "learning_rate": 2.8508513829542086e-06, "loss": 0.5009, "step": 10238 }, { "epoch": 1.3, "grad_norm": 0.832324596194084, "learning_rate": 2.8499199992281113e-06, "loss": 0.5098, "step": 10239 }, { "epoch": 1.3, "grad_norm": 0.5739694357939171, "learning_rate": 2.848988707024047e-06, "loss": 0.4637, "step": 10240 }, { "epoch": 1.3, "grad_norm": 1.022437639239965, "learning_rate": 2.8480575063816617e-06, "loss": 0.5215, "step": 10241 }, { "epoch": 1.3, "grad_norm": 0.6254561500669832, "learning_rate": 2.8471263973405904e-06, "loss": 0.4251, "step": 10242 }, { "epoch": 1.3, "grad_norm": 0.8074913719827148, "learning_rate": 2.8461953799404702e-06, "loss": 0.4465, "step": 10243 }, { "epoch": 1.31, "grad_norm": 0.7669159011448926, "learning_rate": 2.8452644542209283e-06, "loss": 0.4949, "step": 10244 }, { "epoch": 1.31, "grad_norm": 0.6114192213671742, "learning_rate": 2.8443336202215953e-06, "loss": 0.5023, "step": 10245 }, { "epoch": 1.31, "grad_norm": 0.7261979701571393, "learning_rate": 2.843402877982089e-06, "loss": 0.5405, "step": 10246 }, { "epoch": 1.31, "grad_norm": 0.7405986778326757, "learning_rate": 2.842472227542033e-06, "loss": 0.5474, "step": 10247 }, { "epoch": 1.31, "grad_norm": 0.7283534771526289, "learning_rate": 2.841541668941038e-06, "loss": 0.551, "step": 10248 }, { "epoch": 1.31, "grad_norm": 0.606013040166489, "learning_rate": 2.840611202218716e-06, "loss": 0.4986, "step": 10249 }, { "epoch": 1.31, "grad_norm": 0.6612878555801732, "learning_rate": 2.8396808274146757e-06, "loss": 0.5319, "step": 10250 }, { "epoch": 1.31, "grad_norm": 0.8857413887674892, "learning_rate": 2.8387505445685205e-06, "loss": 0.5321, "step": 10251 }, { "epoch": 1.31, "grad_norm": 0.5987038658197168, "learning_rate": 2.837820353719847e-06, "loss": 0.452, "step": 10252 }, { "epoch": 1.31, "grad_norm": 0.6192824010431363, "learning_rate": 2.836890254908252e-06, "loss": 0.5001, "step": 10253 }, { "epoch": 1.31, "grad_norm": 0.7636759672701975, "learning_rate": 2.835960248173326e-06, "loss": 0.5391, "step": 10254 }, { "epoch": 1.31, "grad_norm": 0.6316826054431608, "learning_rate": 2.835030333554659e-06, "loss": 0.4614, "step": 10255 }, { "epoch": 1.31, "grad_norm": 0.6172647901463921, "learning_rate": 2.8341005110918298e-06, "loss": 0.4436, "step": 10256 }, { "epoch": 1.31, "grad_norm": 0.6877274755177915, "learning_rate": 2.833170780824423e-06, "loss": 0.498, "step": 10257 }, { "epoch": 1.31, "grad_norm": 0.5982828188157019, "learning_rate": 2.83224114279201e-06, "loss": 0.4566, "step": 10258 }, { "epoch": 1.31, "grad_norm": 0.792733103944545, "learning_rate": 2.831311597034166e-06, "loss": 0.4908, "step": 10259 }, { "epoch": 1.31, "grad_norm": 0.5374601529386345, "learning_rate": 2.8303821435904554e-06, "loss": 0.4447, "step": 10260 }, { "epoch": 1.31, "grad_norm": 0.627016646477181, "learning_rate": 2.829452782500444e-06, "loss": 0.4607, "step": 10261 }, { "epoch": 1.31, "grad_norm": 0.7128397596507715, "learning_rate": 2.828523513803691e-06, "loss": 0.5326, "step": 10262 }, { "epoch": 1.31, "grad_norm": 0.6797515649418353, "learning_rate": 2.827594337539755e-06, "loss": 0.4838, "step": 10263 }, { "epoch": 1.31, "grad_norm": 0.6103839008157308, "learning_rate": 2.8266652537481842e-06, "loss": 0.4159, "step": 10264 }, { "epoch": 1.31, "grad_norm": 0.6107744884089784, "learning_rate": 2.8257362624685316e-06, "loss": 0.4653, "step": 10265 }, { "epoch": 1.31, "grad_norm": 0.6060263568691121, "learning_rate": 2.824807363740335e-06, "loss": 0.4345, "step": 10266 }, { "epoch": 1.31, "grad_norm": 0.6151861276284115, "learning_rate": 2.82387855760314e-06, "loss": 0.4539, "step": 10267 }, { "epoch": 1.31, "grad_norm": 0.719312302955807, "learning_rate": 2.8229498440964797e-06, "loss": 0.5098, "step": 10268 }, { "epoch": 1.31, "grad_norm": 0.7787548917253838, "learning_rate": 2.82202122325989e-06, "loss": 0.4903, "step": 10269 }, { "epoch": 1.31, "grad_norm": 0.6801256709394332, "learning_rate": 2.821092695132894e-06, "loss": 0.4544, "step": 10270 }, { "epoch": 1.31, "grad_norm": 0.8934962559810521, "learning_rate": 2.8201642597550216e-06, "loss": 0.4587, "step": 10271 }, { "epoch": 1.31, "grad_norm": 0.5458415199796832, "learning_rate": 2.8192359171657883e-06, "loss": 0.4295, "step": 10272 }, { "epoch": 1.31, "grad_norm": 0.5869432881126402, "learning_rate": 2.818307667404716e-06, "loss": 0.4087, "step": 10273 }, { "epoch": 1.31, "grad_norm": 0.8465861078488011, "learning_rate": 2.8173795105113105e-06, "loss": 0.4923, "step": 10274 }, { "epoch": 1.31, "grad_norm": 0.6617769884192014, "learning_rate": 2.8164514465250887e-06, "loss": 0.4302, "step": 10275 }, { "epoch": 1.31, "grad_norm": 0.7092001774204404, "learning_rate": 2.8155234754855486e-06, "loss": 0.5092, "step": 10276 }, { "epoch": 1.31, "grad_norm": 0.7022471518079075, "learning_rate": 2.8145955974321957e-06, "loss": 0.5295, "step": 10277 }, { "epoch": 1.31, "grad_norm": 0.8104323622871926, "learning_rate": 2.8136678124045234e-06, "loss": 0.4892, "step": 10278 }, { "epoch": 1.31, "grad_norm": 0.6373869737959377, "learning_rate": 2.8127401204420276e-06, "loss": 0.4946, "step": 10279 }, { "epoch": 1.31, "grad_norm": 0.7387560399374357, "learning_rate": 2.811812521584193e-06, "loss": 0.4795, "step": 10280 }, { "epoch": 1.31, "grad_norm": 0.5850546118255726, "learning_rate": 2.8108850158705093e-06, "loss": 0.5128, "step": 10281 }, { "epoch": 1.31, "grad_norm": 0.6728292164626909, "learning_rate": 2.8099576033404524e-06, "loss": 0.4763, "step": 10282 }, { "epoch": 1.31, "grad_norm": 0.684555096851381, "learning_rate": 2.8090302840335043e-06, "loss": 0.4436, "step": 10283 }, { "epoch": 1.31, "grad_norm": 0.6571009118436286, "learning_rate": 2.808103057989134e-06, "loss": 0.4849, "step": 10284 }, { "epoch": 1.31, "grad_norm": 0.7087608217287694, "learning_rate": 2.8071759252468138e-06, "loss": 0.5212, "step": 10285 }, { "epoch": 1.31, "grad_norm": 0.8403310059335515, "learning_rate": 2.8062488858460056e-06, "loss": 0.5864, "step": 10286 }, { "epoch": 1.31, "grad_norm": 0.7314393986426435, "learning_rate": 2.8053219398261715e-06, "loss": 0.4602, "step": 10287 }, { "epoch": 1.31, "grad_norm": 0.7083839069407044, "learning_rate": 2.8043950872267717e-06, "loss": 0.4476, "step": 10288 }, { "epoch": 1.31, "grad_norm": 0.6700185312252959, "learning_rate": 2.8034683280872544e-06, "loss": 0.4606, "step": 10289 }, { "epoch": 1.31, "grad_norm": 0.6161550710054887, "learning_rate": 2.8025416624470737e-06, "loss": 0.4463, "step": 10290 }, { "epoch": 1.31, "grad_norm": 0.5871924184064675, "learning_rate": 2.8016150903456706e-06, "loss": 0.4657, "step": 10291 }, { "epoch": 1.31, "grad_norm": 0.7160656083847337, "learning_rate": 2.8006886118224898e-06, "loss": 0.5152, "step": 10292 }, { "epoch": 1.31, "grad_norm": 0.8708033483477031, "learning_rate": 2.7997622269169643e-06, "loss": 0.5324, "step": 10293 }, { "epoch": 1.31, "grad_norm": 0.8495864015237135, "learning_rate": 2.7988359356685307e-06, "loss": 0.5304, "step": 10294 }, { "epoch": 1.31, "grad_norm": 0.8758556575715707, "learning_rate": 2.7979097381166185e-06, "loss": 0.483, "step": 10295 }, { "epoch": 1.31, "grad_norm": 0.6023690437847421, "learning_rate": 2.7969836343006507e-06, "loss": 0.4535, "step": 10296 }, { "epoch": 1.31, "grad_norm": 0.7341064892883139, "learning_rate": 2.796057624260051e-06, "loss": 0.5287, "step": 10297 }, { "epoch": 1.31, "grad_norm": 0.6943803505952622, "learning_rate": 2.7951317080342345e-06, "loss": 0.4895, "step": 10298 }, { "epoch": 1.31, "grad_norm": 0.5712222322452283, "learning_rate": 2.7942058856626147e-06, "loss": 0.3958, "step": 10299 }, { "epoch": 1.31, "grad_norm": 0.680760703570845, "learning_rate": 2.7932801571846035e-06, "loss": 0.5178, "step": 10300 }, { "epoch": 1.31, "grad_norm": 0.7785017709815346, "learning_rate": 2.792354522639603e-06, "loss": 0.5255, "step": 10301 }, { "epoch": 1.31, "grad_norm": 0.8857959021537309, "learning_rate": 2.791428982067017e-06, "loss": 0.4876, "step": 10302 }, { "epoch": 1.31, "grad_norm": 0.7026159846106256, "learning_rate": 2.7905035355062403e-06, "loss": 0.5265, "step": 10303 }, { "epoch": 1.31, "grad_norm": 0.6851347398625993, "learning_rate": 2.7895781829966696e-06, "loss": 0.5018, "step": 10304 }, { "epoch": 1.31, "grad_norm": 0.5777702955082746, "learning_rate": 2.7886529245776896e-06, "loss": 0.4829, "step": 10305 }, { "epoch": 1.31, "grad_norm": 0.6512647504553011, "learning_rate": 2.7877277602886908e-06, "loss": 0.5077, "step": 10306 }, { "epoch": 1.31, "grad_norm": 0.8217845487809845, "learning_rate": 2.786802690169049e-06, "loss": 0.5281, "step": 10307 }, { "epoch": 1.31, "grad_norm": 0.6647988542616354, "learning_rate": 2.7858777142581474e-06, "loss": 0.5418, "step": 10308 }, { "epoch": 1.31, "grad_norm": 0.8704886978408509, "learning_rate": 2.7849528325953532e-06, "loss": 0.5389, "step": 10309 }, { "epoch": 1.31, "grad_norm": 0.7667554399995513, "learning_rate": 2.784028045220041e-06, "loss": 0.5095, "step": 10310 }, { "epoch": 1.31, "grad_norm": 0.5799480689458069, "learning_rate": 2.78310335217157e-06, "loss": 0.4272, "step": 10311 }, { "epoch": 1.31, "grad_norm": 0.6003253809909085, "learning_rate": 2.7821787534893086e-06, "loss": 0.4406, "step": 10312 }, { "epoch": 1.31, "grad_norm": 0.6632551367750247, "learning_rate": 2.781254249212609e-06, "loss": 0.4135, "step": 10313 }, { "epoch": 1.31, "grad_norm": 0.6204520004420718, "learning_rate": 2.7803298393808275e-06, "loss": 0.5279, "step": 10314 }, { "epoch": 1.31, "grad_norm": 0.718102852184217, "learning_rate": 2.7794055240333097e-06, "loss": 0.5138, "step": 10315 }, { "epoch": 1.31, "grad_norm": 0.7621876313546597, "learning_rate": 2.778481303209405e-06, "loss": 0.4995, "step": 10316 }, { "epoch": 1.31, "grad_norm": 0.6308741026634868, "learning_rate": 2.77755717694845e-06, "loss": 0.477, "step": 10317 }, { "epoch": 1.31, "grad_norm": 0.5793751279011736, "learning_rate": 2.7766331452897866e-06, "loss": 0.4422, "step": 10318 }, { "epoch": 1.31, "grad_norm": 0.7060617440529559, "learning_rate": 2.775709208272742e-06, "loss": 0.4876, "step": 10319 }, { "epoch": 1.31, "grad_norm": 0.9337510585974735, "learning_rate": 2.774785365936652e-06, "loss": 0.5147, "step": 10320 }, { "epoch": 1.31, "grad_norm": 0.5594443465244961, "learning_rate": 2.7738616183208355e-06, "loss": 0.4208, "step": 10321 }, { "epoch": 1.31, "grad_norm": 0.5780470233219226, "learning_rate": 2.772937965464619e-06, "loss": 0.4714, "step": 10322 }, { "epoch": 1.32, "grad_norm": 0.5957072811076571, "learning_rate": 2.772014407407312e-06, "loss": 0.447, "step": 10323 }, { "epoch": 1.32, "grad_norm": 0.6632208242494634, "learning_rate": 2.771090944188236e-06, "loss": 0.5158, "step": 10324 }, { "epoch": 1.32, "grad_norm": 0.7936907643249319, "learning_rate": 2.770167575846694e-06, "loss": 0.5607, "step": 10325 }, { "epoch": 1.32, "grad_norm": 2.0528812035680306, "learning_rate": 2.769244302421995e-06, "loss": 0.528, "step": 10326 }, { "epoch": 1.32, "grad_norm": 0.5810315354662225, "learning_rate": 2.7683211239534346e-06, "loss": 0.4219, "step": 10327 }, { "epoch": 1.32, "grad_norm": 0.6969253167692392, "learning_rate": 2.7673980404803156e-06, "loss": 0.5126, "step": 10328 }, { "epoch": 1.32, "grad_norm": 0.7726218100733003, "learning_rate": 2.766475052041926e-06, "loss": 0.5371, "step": 10329 }, { "epoch": 1.32, "grad_norm": 0.7099240571205144, "learning_rate": 2.765552158677557e-06, "loss": 0.4905, "step": 10330 }, { "epoch": 1.32, "grad_norm": 0.619004841127661, "learning_rate": 2.7646293604264908e-06, "loss": 0.4465, "step": 10331 }, { "epoch": 1.32, "grad_norm": 0.6415019938912052, "learning_rate": 2.763706657328011e-06, "loss": 0.5054, "step": 10332 }, { "epoch": 1.32, "grad_norm": 0.6756060090921461, "learning_rate": 2.7627840494213914e-06, "loss": 0.4842, "step": 10333 }, { "epoch": 1.32, "grad_norm": 0.6233721176824821, "learning_rate": 2.761861536745908e-06, "loss": 0.5191, "step": 10334 }, { "epoch": 1.32, "grad_norm": 0.6540391906862236, "learning_rate": 2.7609391193408243e-06, "loss": 0.4916, "step": 10335 }, { "epoch": 1.32, "grad_norm": 0.5785833943689869, "learning_rate": 2.760016797245407e-06, "loss": 0.4602, "step": 10336 }, { "epoch": 1.32, "grad_norm": 0.6214001281557133, "learning_rate": 2.7590945704989168e-06, "loss": 0.4682, "step": 10337 }, { "epoch": 1.32, "grad_norm": 0.7143726147005993, "learning_rate": 2.758172439140612e-06, "loss": 0.515, "step": 10338 }, { "epoch": 1.32, "grad_norm": 0.6172944417762497, "learning_rate": 2.7572504032097406e-06, "loss": 0.4129, "step": 10339 }, { "epoch": 1.32, "grad_norm": 0.6894994342783959, "learning_rate": 2.7563284627455545e-06, "loss": 0.4387, "step": 10340 }, { "epoch": 1.32, "grad_norm": 0.6526816237412422, "learning_rate": 2.7554066177872948e-06, "loss": 0.4414, "step": 10341 }, { "epoch": 1.32, "grad_norm": 0.6287719420341326, "learning_rate": 2.754484868374204e-06, "loss": 0.4557, "step": 10342 }, { "epoch": 1.32, "grad_norm": 0.8484841580775698, "learning_rate": 2.753563214545515e-06, "loss": 0.4992, "step": 10343 }, { "epoch": 1.32, "grad_norm": 0.6082974126964179, "learning_rate": 2.752641656340463e-06, "loss": 0.4208, "step": 10344 }, { "epoch": 1.32, "grad_norm": 0.594648707224296, "learning_rate": 2.7517201937982724e-06, "loss": 0.4778, "step": 10345 }, { "epoch": 1.32, "grad_norm": 0.7926787184939269, "learning_rate": 2.750798826958171e-06, "loss": 0.4674, "step": 10346 }, { "epoch": 1.32, "grad_norm": 0.6785821081690722, "learning_rate": 2.749877555859373e-06, "loss": 0.4938, "step": 10347 }, { "epoch": 1.32, "grad_norm": 0.720702003143994, "learning_rate": 2.748956380541098e-06, "loss": 0.5642, "step": 10348 }, { "epoch": 1.32, "grad_norm": 0.7127434240465371, "learning_rate": 2.7480353010425586e-06, "loss": 0.5575, "step": 10349 }, { "epoch": 1.32, "grad_norm": 0.8407082801634858, "learning_rate": 2.747114317402958e-06, "loss": 0.4769, "step": 10350 }, { "epoch": 1.32, "grad_norm": 0.7015039866122491, "learning_rate": 2.7461934296615034e-06, "loss": 0.4897, "step": 10351 }, { "epoch": 1.32, "grad_norm": 0.7008876470538534, "learning_rate": 2.7452726378573913e-06, "loss": 0.4816, "step": 10352 }, { "epoch": 1.32, "grad_norm": 0.6092498228414746, "learning_rate": 2.7443519420298192e-06, "loss": 0.4398, "step": 10353 }, { "epoch": 1.32, "grad_norm": 0.6386698963862778, "learning_rate": 2.743431342217975e-06, "loss": 0.4287, "step": 10354 }, { "epoch": 1.32, "grad_norm": 0.6112131601942666, "learning_rate": 2.742510838461048e-06, "loss": 0.4682, "step": 10355 }, { "epoch": 1.32, "grad_norm": 0.6570679633884028, "learning_rate": 2.741590430798222e-06, "loss": 0.4796, "step": 10356 }, { "epoch": 1.32, "grad_norm": 0.569073328216951, "learning_rate": 2.7406701192686725e-06, "loss": 0.4368, "step": 10357 }, { "epoch": 1.32, "grad_norm": 0.5715520359315882, "learning_rate": 2.7397499039115786e-06, "loss": 0.4126, "step": 10358 }, { "epoch": 1.32, "grad_norm": 0.7368551643100076, "learning_rate": 2.7388297847661066e-06, "loss": 0.5256, "step": 10359 }, { "epoch": 1.32, "grad_norm": 0.7238767532920479, "learning_rate": 2.737909761871425e-06, "loss": 0.5203, "step": 10360 }, { "epoch": 1.32, "grad_norm": 0.5591126185576063, "learning_rate": 2.736989835266698e-06, "loss": 0.4331, "step": 10361 }, { "epoch": 1.32, "grad_norm": 0.879536262743953, "learning_rate": 2.7360700049910806e-06, "loss": 0.5727, "step": 10362 }, { "epoch": 1.32, "grad_norm": 0.6834681890321648, "learning_rate": 2.7351502710837303e-06, "loss": 0.4925, "step": 10363 }, { "epoch": 1.32, "grad_norm": 0.8807061460864501, "learning_rate": 2.734230633583793e-06, "loss": 0.5137, "step": 10364 }, { "epoch": 1.32, "grad_norm": 0.7100398944654575, "learning_rate": 2.7333110925304207e-06, "loss": 0.5044, "step": 10365 }, { "epoch": 1.32, "grad_norm": 0.8074920784042978, "learning_rate": 2.732391647962749e-06, "loss": 0.548, "step": 10366 }, { "epoch": 1.32, "grad_norm": 0.772978060469139, "learning_rate": 2.7314722999199206e-06, "loss": 0.51, "step": 10367 }, { "epoch": 1.32, "grad_norm": 0.6521215880523424, "learning_rate": 2.7305530484410646e-06, "loss": 0.5046, "step": 10368 }, { "epoch": 1.32, "grad_norm": 0.6954414989450015, "learning_rate": 2.7296338935653167e-06, "loss": 0.4769, "step": 10369 }, { "epoch": 1.32, "grad_norm": 0.8740772687049388, "learning_rate": 2.728714835331796e-06, "loss": 0.5171, "step": 10370 }, { "epoch": 1.32, "grad_norm": 0.7581098288126822, "learning_rate": 2.7277958737796283e-06, "loss": 0.5144, "step": 10371 }, { "epoch": 1.32, "grad_norm": 0.8589557568648777, "learning_rate": 2.726877008947928e-06, "loss": 0.5091, "step": 10372 }, { "epoch": 1.32, "grad_norm": 0.6110068588269921, "learning_rate": 2.7259582408758096e-06, "loss": 0.4446, "step": 10373 }, { "epoch": 1.32, "grad_norm": 0.7147663911311242, "learning_rate": 2.725039569602382e-06, "loss": 0.4835, "step": 10374 }, { "epoch": 1.32, "grad_norm": 0.7413469584473779, "learning_rate": 2.724120995166752e-06, "loss": 0.544, "step": 10375 }, { "epoch": 1.32, "grad_norm": 0.8872715134898821, "learning_rate": 2.723202517608017e-06, "loss": 0.5739, "step": 10376 }, { "epoch": 1.32, "grad_norm": 0.6907243464323122, "learning_rate": 2.7222841369652764e-06, "loss": 0.4808, "step": 10377 }, { "epoch": 1.32, "grad_norm": 0.5595479299385279, "learning_rate": 2.7213658532776197e-06, "loss": 0.4381, "step": 10378 }, { "epoch": 1.32, "grad_norm": 0.6354036481681429, "learning_rate": 2.7204476665841395e-06, "loss": 0.4569, "step": 10379 }, { "epoch": 1.32, "grad_norm": 0.6061294230134897, "learning_rate": 2.719529576923916e-06, "loss": 0.4636, "step": 10380 }, { "epoch": 1.32, "grad_norm": 0.6723032050636789, "learning_rate": 2.7186115843360326e-06, "loss": 0.4954, "step": 10381 }, { "epoch": 1.32, "grad_norm": 0.6208787356448009, "learning_rate": 2.7176936888595625e-06, "loss": 0.488, "step": 10382 }, { "epoch": 1.32, "grad_norm": 0.6094514859095329, "learning_rate": 2.71677589053358e-06, "loss": 0.424, "step": 10383 }, { "epoch": 1.32, "grad_norm": 0.7362013402079931, "learning_rate": 2.7158581893971504e-06, "loss": 0.4554, "step": 10384 }, { "epoch": 1.32, "grad_norm": 0.7158430907069553, "learning_rate": 2.714940585489338e-06, "loss": 0.541, "step": 10385 }, { "epoch": 1.32, "grad_norm": 0.7508972989594184, "learning_rate": 2.714023078849203e-06, "loss": 0.5434, "step": 10386 }, { "epoch": 1.32, "grad_norm": 0.6837608999964359, "learning_rate": 2.7131056695158033e-06, "loss": 0.493, "step": 10387 }, { "epoch": 1.32, "grad_norm": 0.8794385664082642, "learning_rate": 2.7121883575281844e-06, "loss": 0.5156, "step": 10388 }, { "epoch": 1.32, "grad_norm": 0.8270004047901359, "learning_rate": 2.7112711429253987e-06, "loss": 0.5802, "step": 10389 }, { "epoch": 1.32, "grad_norm": 0.852907495806386, "learning_rate": 2.7103540257464845e-06, "loss": 0.519, "step": 10390 }, { "epoch": 1.32, "grad_norm": 0.6275563166804927, "learning_rate": 2.709437006030485e-06, "loss": 0.5077, "step": 10391 }, { "epoch": 1.32, "grad_norm": 0.7216492105876493, "learning_rate": 2.7085200838164306e-06, "loss": 0.5058, "step": 10392 }, { "epoch": 1.32, "grad_norm": 0.7367917113004402, "learning_rate": 2.7076032591433554e-06, "loss": 0.5284, "step": 10393 }, { "epoch": 1.32, "grad_norm": 1.3768037595849074, "learning_rate": 2.706686532050282e-06, "loss": 0.527, "step": 10394 }, { "epoch": 1.32, "grad_norm": 0.7261575510917068, "learning_rate": 2.7057699025762372e-06, "loss": 0.5497, "step": 10395 }, { "epoch": 1.32, "grad_norm": 0.7461490466559105, "learning_rate": 2.7048533707602343e-06, "loss": 0.5054, "step": 10396 }, { "epoch": 1.32, "grad_norm": 0.5935209370164812, "learning_rate": 2.7039369366412906e-06, "loss": 0.3943, "step": 10397 }, { "epoch": 1.32, "grad_norm": 0.5945092786602868, "learning_rate": 2.7030206002584102e-06, "loss": 0.4446, "step": 10398 }, { "epoch": 1.32, "grad_norm": 1.3263798578125456, "learning_rate": 2.7021043616506073e-06, "loss": 0.5199, "step": 10399 }, { "epoch": 1.32, "grad_norm": 0.6861515727606337, "learning_rate": 2.7011882208568764e-06, "loss": 0.4987, "step": 10400 }, { "epoch": 1.33, "grad_norm": 0.7205811719774767, "learning_rate": 2.7002721779162194e-06, "loss": 0.5265, "step": 10401 }, { "epoch": 1.33, "grad_norm": 0.7385668990841435, "learning_rate": 2.6993562328676247e-06, "loss": 0.5232, "step": 10402 }, { "epoch": 1.33, "grad_norm": 0.73489183386022, "learning_rate": 2.698440385750085e-06, "loss": 0.4837, "step": 10403 }, { "epoch": 1.33, "grad_norm": 0.595810960905717, "learning_rate": 2.6975246366025822e-06, "loss": 0.4371, "step": 10404 }, { "epoch": 1.33, "grad_norm": 0.60407533029049, "learning_rate": 2.6966089854640987e-06, "loss": 0.4683, "step": 10405 }, { "epoch": 1.33, "grad_norm": 0.6359714362799845, "learning_rate": 2.695693432373609e-06, "loss": 0.42, "step": 10406 }, { "epoch": 1.33, "grad_norm": 0.6396203805091751, "learning_rate": 2.694777977370088e-06, "loss": 0.4969, "step": 10407 }, { "epoch": 1.33, "grad_norm": 0.7495227349500316, "learning_rate": 2.6938626204925e-06, "loss": 0.5232, "step": 10408 }, { "epoch": 1.33, "grad_norm": 0.6957898811146143, "learning_rate": 2.692947361779813e-06, "loss": 0.4519, "step": 10409 }, { "epoch": 1.33, "grad_norm": 0.6135722733042351, "learning_rate": 2.6920322012709832e-06, "loss": 0.5092, "step": 10410 }, { "epoch": 1.33, "grad_norm": 0.8106406537348229, "learning_rate": 2.691117139004966e-06, "loss": 0.5211, "step": 10411 }, { "epoch": 1.33, "grad_norm": 0.7911049720073707, "learning_rate": 2.6902021750207176e-06, "loss": 0.5253, "step": 10412 }, { "epoch": 1.33, "grad_norm": 0.590970404960969, "learning_rate": 2.689287309357179e-06, "loss": 0.4332, "step": 10413 }, { "epoch": 1.33, "grad_norm": 0.6897002819401162, "learning_rate": 2.688372542053297e-06, "loss": 0.4909, "step": 10414 }, { "epoch": 1.33, "grad_norm": 0.6949444258759422, "learning_rate": 2.6874578731480072e-06, "loss": 0.4939, "step": 10415 }, { "epoch": 1.33, "grad_norm": 0.5653657736103492, "learning_rate": 2.6865433026802485e-06, "loss": 0.4331, "step": 10416 }, { "epoch": 1.33, "grad_norm": 0.6137579517920647, "learning_rate": 2.6856288306889465e-06, "loss": 0.4235, "step": 10417 }, { "epoch": 1.33, "grad_norm": 0.5642045045712887, "learning_rate": 2.684714457213029e-06, "loss": 0.4155, "step": 10418 }, { "epoch": 1.33, "grad_norm": 0.6865290335860689, "learning_rate": 2.6838001822914205e-06, "loss": 0.4674, "step": 10419 }, { "epoch": 1.33, "grad_norm": 0.6881986360685356, "learning_rate": 2.6828860059630355e-06, "loss": 0.4622, "step": 10420 }, { "epoch": 1.33, "grad_norm": 0.6616896634586286, "learning_rate": 2.68197192826679e-06, "loss": 0.4406, "step": 10421 }, { "epoch": 1.33, "grad_norm": 0.6154911590713704, "learning_rate": 2.6810579492415906e-06, "loss": 0.4032, "step": 10422 }, { "epoch": 1.33, "grad_norm": 0.6174447590466567, "learning_rate": 2.6801440689263438e-06, "loss": 0.4651, "step": 10423 }, { "epoch": 1.33, "grad_norm": 0.6742644993561953, "learning_rate": 2.679230287359953e-06, "loss": 0.4749, "step": 10424 }, { "epoch": 1.33, "grad_norm": 0.6392911119217446, "learning_rate": 2.67831660458131e-06, "loss": 0.4367, "step": 10425 }, { "epoch": 1.33, "grad_norm": 0.6761821235044461, "learning_rate": 2.6774030206293132e-06, "loss": 0.4819, "step": 10426 }, { "epoch": 1.33, "grad_norm": 0.5601652227907855, "learning_rate": 2.6764895355428456e-06, "loss": 0.4231, "step": 10427 }, { "epoch": 1.33, "grad_norm": 1.1323072996383916, "learning_rate": 2.675576149360795e-06, "loss": 0.498, "step": 10428 }, { "epoch": 1.33, "grad_norm": 0.6688631409101287, "learning_rate": 2.674662862122038e-06, "loss": 0.4573, "step": 10429 }, { "epoch": 1.33, "grad_norm": 0.5925262351078397, "learning_rate": 2.673749673865455e-06, "loss": 0.4537, "step": 10430 }, { "epoch": 1.33, "grad_norm": 0.6078301519512345, "learning_rate": 2.672836584629912e-06, "loss": 0.4297, "step": 10431 }, { "epoch": 1.33, "grad_norm": 0.6043560196132154, "learning_rate": 2.6719235944542816e-06, "loss": 0.4425, "step": 10432 }, { "epoch": 1.33, "grad_norm": 0.6145893193107074, "learning_rate": 2.6710107033774225e-06, "loss": 0.4801, "step": 10433 }, { "epoch": 1.33, "grad_norm": 0.7046347501259793, "learning_rate": 2.6700979114381966e-06, "loss": 0.5022, "step": 10434 }, { "epoch": 1.33, "grad_norm": 0.955439941567224, "learning_rate": 2.6691852186754535e-06, "loss": 0.5311, "step": 10435 }, { "epoch": 1.33, "grad_norm": 0.7776049542735206, "learning_rate": 2.6682726251280515e-06, "loss": 0.5564, "step": 10436 }, { "epoch": 1.33, "grad_norm": 0.6946366134677137, "learning_rate": 2.6673601308348307e-06, "loss": 0.5111, "step": 10437 }, { "epoch": 1.33, "grad_norm": 0.6056262640099894, "learning_rate": 2.6664477358346375e-06, "loss": 0.4693, "step": 10438 }, { "epoch": 1.33, "grad_norm": 0.702928538440643, "learning_rate": 2.6655354401663047e-06, "loss": 0.464, "step": 10439 }, { "epoch": 1.33, "grad_norm": 0.7428536543476963, "learning_rate": 2.6646232438686704e-06, "loss": 0.5165, "step": 10440 }, { "epoch": 1.33, "grad_norm": 0.7889657852679707, "learning_rate": 2.66371114698056e-06, "loss": 0.5534, "step": 10441 }, { "epoch": 1.33, "grad_norm": 0.6419378921457135, "learning_rate": 2.6627991495408025e-06, "loss": 0.4272, "step": 10442 }, { "epoch": 1.33, "grad_norm": 0.5456671136530137, "learning_rate": 2.6618872515882143e-06, "loss": 0.4286, "step": 10443 }, { "epoch": 1.33, "grad_norm": 0.6160738899883795, "learning_rate": 2.6609754531616162e-06, "loss": 0.4285, "step": 10444 }, { "epoch": 1.33, "grad_norm": 0.6922031132322712, "learning_rate": 2.6600637542998164e-06, "loss": 0.4647, "step": 10445 }, { "epoch": 1.33, "grad_norm": 0.6074556864594911, "learning_rate": 2.6591521550416275e-06, "loss": 0.4475, "step": 10446 }, { "epoch": 1.33, "grad_norm": 0.5960269225081608, "learning_rate": 2.658240655425847e-06, "loss": 0.4518, "step": 10447 }, { "epoch": 1.33, "grad_norm": 0.6564091672395928, "learning_rate": 2.6573292554912832e-06, "loss": 0.486, "step": 10448 }, { "epoch": 1.33, "grad_norm": 0.7316632297416248, "learning_rate": 2.6564179552767245e-06, "loss": 0.4529, "step": 10449 }, { "epoch": 1.33, "grad_norm": 0.6384750104167783, "learning_rate": 2.6555067548209655e-06, "loss": 0.4736, "step": 10450 }, { "epoch": 1.33, "grad_norm": 0.9465589381693489, "learning_rate": 2.6545956541627913e-06, "loss": 0.5254, "step": 10451 }, { "epoch": 1.33, "grad_norm": 0.7444444643752165, "learning_rate": 2.6536846533409865e-06, "loss": 0.4599, "step": 10452 }, { "epoch": 1.33, "grad_norm": 0.540997955708366, "learning_rate": 2.6527737523943264e-06, "loss": 0.3976, "step": 10453 }, { "epoch": 1.33, "grad_norm": 0.6451066744950943, "learning_rate": 2.6518629513615894e-06, "loss": 0.4628, "step": 10454 }, { "epoch": 1.33, "grad_norm": 0.7762395218882295, "learning_rate": 2.650952250281541e-06, "loss": 0.4863, "step": 10455 }, { "epoch": 1.33, "grad_norm": 0.6056665589043336, "learning_rate": 2.6500416491929505e-06, "loss": 0.4591, "step": 10456 }, { "epoch": 1.33, "grad_norm": 0.9170622477340048, "learning_rate": 2.6491311481345763e-06, "loss": 0.5517, "step": 10457 }, { "epoch": 1.33, "grad_norm": 0.5882766029963012, "learning_rate": 2.6482207471451775e-06, "loss": 0.4447, "step": 10458 }, { "epoch": 1.33, "grad_norm": 0.790139805967078, "learning_rate": 2.6473104462635048e-06, "loss": 0.468, "step": 10459 }, { "epoch": 1.33, "grad_norm": 0.5435428189331143, "learning_rate": 2.646400245528309e-06, "loss": 0.4369, "step": 10460 }, { "epoch": 1.33, "grad_norm": 0.6846943788527304, "learning_rate": 2.6454901449783334e-06, "loss": 0.4924, "step": 10461 }, { "epoch": 1.33, "grad_norm": 0.9569522481544581, "learning_rate": 2.64458014465232e-06, "loss": 0.545, "step": 10462 }, { "epoch": 1.33, "grad_norm": 0.804079115785682, "learning_rate": 2.6436702445890007e-06, "loss": 0.5081, "step": 10463 }, { "epoch": 1.33, "grad_norm": 0.5884263214599849, "learning_rate": 2.6427604448271116e-06, "loss": 0.4055, "step": 10464 }, { "epoch": 1.33, "grad_norm": 0.5927593392228628, "learning_rate": 2.6418507454053764e-06, "loss": 0.4662, "step": 10465 }, { "epoch": 1.33, "grad_norm": 0.7954120688325812, "learning_rate": 2.6409411463625207e-06, "loss": 0.4887, "step": 10466 }, { "epoch": 1.33, "grad_norm": 0.7019531549203019, "learning_rate": 2.64003164773726e-06, "loss": 0.4677, "step": 10467 }, { "epoch": 1.33, "grad_norm": 0.7026126361040785, "learning_rate": 2.6391222495683134e-06, "loss": 0.4784, "step": 10468 }, { "epoch": 1.33, "grad_norm": 0.8387632757186848, "learning_rate": 2.6382129518943856e-06, "loss": 0.4801, "step": 10469 }, { "epoch": 1.33, "grad_norm": 0.8084044790296943, "learning_rate": 2.6373037547541867e-06, "loss": 0.5229, "step": 10470 }, { "epoch": 1.33, "grad_norm": 0.6055974131143065, "learning_rate": 2.6363946581864156e-06, "loss": 0.4626, "step": 10471 }, { "epoch": 1.33, "grad_norm": 0.7762961284619238, "learning_rate": 2.6354856622297707e-06, "loss": 0.5326, "step": 10472 }, { "epoch": 1.33, "grad_norm": 0.6834070519293799, "learning_rate": 2.6345767669229482e-06, "loss": 0.4981, "step": 10473 }, { "epoch": 1.33, "grad_norm": 0.653968676520899, "learning_rate": 2.633667972304631e-06, "loss": 0.435, "step": 10474 }, { "epoch": 1.33, "grad_norm": 0.6717962390423486, "learning_rate": 2.6327592784135082e-06, "loss": 0.4299, "step": 10475 }, { "epoch": 1.33, "grad_norm": 0.699017160924299, "learning_rate": 2.631850685288257e-06, "loss": 0.4893, "step": 10476 }, { "epoch": 1.33, "grad_norm": 0.6701807020173425, "learning_rate": 2.6309421929675562e-06, "loss": 0.4929, "step": 10477 }, { "epoch": 1.33, "grad_norm": 0.6229388228504292, "learning_rate": 2.630033801490074e-06, "loss": 0.4395, "step": 10478 }, { "epoch": 1.33, "grad_norm": 0.5556153721486646, "learning_rate": 2.6291255108944794e-06, "loss": 0.4388, "step": 10479 }, { "epoch": 1.34, "grad_norm": 0.8806362729927469, "learning_rate": 2.628217321219438e-06, "loss": 0.5149, "step": 10480 }, { "epoch": 1.34, "grad_norm": 0.748900621539695, "learning_rate": 2.6273092325036037e-06, "loss": 0.5093, "step": 10481 }, { "epoch": 1.34, "grad_norm": 0.7987917057961558, "learning_rate": 2.6264012447856356e-06, "loss": 0.5104, "step": 10482 }, { "epoch": 1.34, "grad_norm": 0.5360859916938244, "learning_rate": 2.625493358104179e-06, "loss": 0.4201, "step": 10483 }, { "epoch": 1.34, "grad_norm": 0.698113049700735, "learning_rate": 2.624585572497883e-06, "loss": 0.4428, "step": 10484 }, { "epoch": 1.34, "grad_norm": 0.6595825637544322, "learning_rate": 2.6236778880053903e-06, "loss": 0.4381, "step": 10485 }, { "epoch": 1.34, "grad_norm": 0.6149852131666086, "learning_rate": 2.622770304665334e-06, "loss": 0.5021, "step": 10486 }, { "epoch": 1.34, "grad_norm": 0.6687978346064894, "learning_rate": 2.621862822516351e-06, "loss": 0.4944, "step": 10487 }, { "epoch": 1.34, "grad_norm": 0.5596785788185218, "learning_rate": 2.6209554415970668e-06, "loss": 0.4276, "step": 10488 }, { "epoch": 1.34, "grad_norm": 0.6159920787070838, "learning_rate": 2.6200481619461087e-06, "loss": 0.5368, "step": 10489 }, { "epoch": 1.34, "grad_norm": 0.803250683156127, "learning_rate": 2.619140983602093e-06, "loss": 0.5126, "step": 10490 }, { "epoch": 1.34, "grad_norm": 0.7072484586432487, "learning_rate": 2.61823390660364e-06, "loss": 0.5449, "step": 10491 }, { "epoch": 1.34, "grad_norm": 0.7122089858160523, "learning_rate": 2.617326930989357e-06, "loss": 0.5384, "step": 10492 }, { "epoch": 1.34, "grad_norm": 0.764709926973671, "learning_rate": 2.6164200567978538e-06, "loss": 0.5251, "step": 10493 }, { "epoch": 1.34, "grad_norm": 0.7965000517741182, "learning_rate": 2.61551328406773e-06, "loss": 0.5426, "step": 10494 }, { "epoch": 1.34, "grad_norm": 0.5722800226789765, "learning_rate": 2.614606612837588e-06, "loss": 0.4667, "step": 10495 }, { "epoch": 1.34, "grad_norm": 0.6066833520036666, "learning_rate": 2.613700043146018e-06, "loss": 0.5074, "step": 10496 }, { "epoch": 1.34, "grad_norm": 0.540690502361817, "learning_rate": 2.612793575031611e-06, "loss": 0.4146, "step": 10497 }, { "epoch": 1.34, "grad_norm": 0.5691137692350555, "learning_rate": 2.6118872085329545e-06, "loss": 0.4446, "step": 10498 }, { "epoch": 1.34, "grad_norm": 0.660356675828652, "learning_rate": 2.610980943688629e-06, "loss": 0.4777, "step": 10499 }, { "epoch": 1.34, "grad_norm": 0.8759596562439051, "learning_rate": 2.6100747805372097e-06, "loss": 0.5395, "step": 10500 }, { "epoch": 1.34, "grad_norm": 0.5968988089047996, "learning_rate": 2.609168719117271e-06, "loss": 0.526, "step": 10501 }, { "epoch": 1.34, "grad_norm": 0.9312721545499884, "learning_rate": 2.6082627594673777e-06, "loss": 0.542, "step": 10502 }, { "epoch": 1.34, "grad_norm": 0.5907621184336649, "learning_rate": 2.607356901626098e-06, "loss": 0.4629, "step": 10503 }, { "epoch": 1.34, "grad_norm": 0.7301453061227231, "learning_rate": 2.6064511456319874e-06, "loss": 0.4549, "step": 10504 }, { "epoch": 1.34, "grad_norm": 0.6367422057780036, "learning_rate": 2.605545491523605e-06, "loss": 0.4091, "step": 10505 }, { "epoch": 1.34, "grad_norm": 0.6060024480535015, "learning_rate": 2.6046399393394973e-06, "loss": 0.4459, "step": 10506 }, { "epoch": 1.34, "grad_norm": 0.603006322669303, "learning_rate": 2.6037344891182147e-06, "loss": 0.4441, "step": 10507 }, { "epoch": 1.34, "grad_norm": 0.7097251674959564, "learning_rate": 2.602829140898296e-06, "loss": 0.4737, "step": 10508 }, { "epoch": 1.34, "grad_norm": 0.6045866969731865, "learning_rate": 2.6019238947182802e-06, "loss": 0.5126, "step": 10509 }, { "epoch": 1.34, "grad_norm": 0.6806143131924589, "learning_rate": 2.6010187506167008e-06, "loss": 0.5375, "step": 10510 }, { "epoch": 1.34, "grad_norm": 0.735141985519335, "learning_rate": 2.6001137086320895e-06, "loss": 0.5032, "step": 10511 }, { "epoch": 1.34, "grad_norm": 0.6999838503513939, "learning_rate": 2.5992087688029666e-06, "loss": 0.5049, "step": 10512 }, { "epoch": 1.34, "grad_norm": 0.6874803323596783, "learning_rate": 2.5983039311678567e-06, "loss": 0.4751, "step": 10513 }, { "epoch": 1.34, "grad_norm": 0.5926923896376749, "learning_rate": 2.597399195765271e-06, "loss": 0.4414, "step": 10514 }, { "epoch": 1.34, "grad_norm": 0.5425670683386655, "learning_rate": 2.5964945626337268e-06, "loss": 0.4205, "step": 10515 }, { "epoch": 1.34, "grad_norm": 0.6970937596512157, "learning_rate": 2.5955900318117266e-06, "loss": 0.4235, "step": 10516 }, { "epoch": 1.34, "grad_norm": 0.6205585218731938, "learning_rate": 2.5946856033377767e-06, "loss": 0.4693, "step": 10517 }, { "epoch": 1.34, "grad_norm": 0.6938979631909559, "learning_rate": 2.593781277250372e-06, "loss": 0.4685, "step": 10518 }, { "epoch": 1.34, "grad_norm": 0.7968496125303536, "learning_rate": 2.5928770535880122e-06, "loss": 0.5244, "step": 10519 }, { "epoch": 1.34, "grad_norm": 0.7664858655971052, "learning_rate": 2.591972932389182e-06, "loss": 0.5162, "step": 10520 }, { "epoch": 1.34, "grad_norm": 0.6411154348649356, "learning_rate": 2.591068913692371e-06, "loss": 0.4921, "step": 10521 }, { "epoch": 1.34, "grad_norm": 0.8168666097855334, "learning_rate": 2.5901649975360543e-06, "loss": 0.5143, "step": 10522 }, { "epoch": 1.34, "grad_norm": 0.7723265278626535, "learning_rate": 2.5892611839587175e-06, "loss": 0.5433, "step": 10523 }, { "epoch": 1.34, "grad_norm": 0.8534329527893085, "learning_rate": 2.588357472998826e-06, "loss": 0.4499, "step": 10524 }, { "epoch": 1.34, "grad_norm": 0.8478258952328214, "learning_rate": 2.587453864694852e-06, "loss": 0.5397, "step": 10525 }, { "epoch": 1.34, "grad_norm": 0.6214870427952506, "learning_rate": 2.586550359085256e-06, "loss": 0.4357, "step": 10526 }, { "epoch": 1.34, "grad_norm": 0.6422511873552887, "learning_rate": 2.5856469562085013e-06, "loss": 0.4906, "step": 10527 }, { "epoch": 1.34, "grad_norm": 0.7478310730735366, "learning_rate": 2.584743656103038e-06, "loss": 0.4967, "step": 10528 }, { "epoch": 1.34, "grad_norm": 0.6021550539295655, "learning_rate": 2.5838404588073212e-06, "loss": 0.4701, "step": 10529 }, { "epoch": 1.34, "grad_norm": 0.5786767914169088, "learning_rate": 2.5829373643597932e-06, "loss": 0.4634, "step": 10530 }, { "epoch": 1.34, "grad_norm": 0.5903349405229537, "learning_rate": 2.5820343727989e-06, "loss": 0.4783, "step": 10531 }, { "epoch": 1.34, "grad_norm": 0.7419883312138332, "learning_rate": 2.5811314841630746e-06, "loss": 0.5394, "step": 10532 }, { "epoch": 1.34, "grad_norm": 0.642325443778341, "learning_rate": 2.5802286984907544e-06, "loss": 0.4568, "step": 10533 }, { "epoch": 1.34, "grad_norm": 0.6013737178069675, "learning_rate": 2.5793260158203637e-06, "loss": 0.4121, "step": 10534 }, { "epoch": 1.34, "grad_norm": 0.6248434686109372, "learning_rate": 2.57842343619033e-06, "loss": 0.47, "step": 10535 }, { "epoch": 1.34, "grad_norm": 0.6827723721043608, "learning_rate": 2.577520959639074e-06, "loss": 0.5715, "step": 10536 }, { "epoch": 1.34, "grad_norm": 0.7917541185057193, "learning_rate": 2.576618586205007e-06, "loss": 0.5129, "step": 10537 }, { "epoch": 1.34, "grad_norm": 0.6355583024877731, "learning_rate": 2.5757163159265454e-06, "loss": 0.4837, "step": 10538 }, { "epoch": 1.34, "grad_norm": 0.6567955558681297, "learning_rate": 2.574814148842091e-06, "loss": 0.4935, "step": 10539 }, { "epoch": 1.34, "grad_norm": 0.8557123190373086, "learning_rate": 2.5739120849900488e-06, "loss": 0.4954, "step": 10540 }, { "epoch": 1.34, "grad_norm": 0.7187077331673724, "learning_rate": 2.573010124408818e-06, "loss": 0.4767, "step": 10541 }, { "epoch": 1.34, "grad_norm": 0.7783758985801082, "learning_rate": 2.572108267136789e-06, "loss": 0.5312, "step": 10542 }, { "epoch": 1.34, "grad_norm": 0.7522295070272268, "learning_rate": 2.5712065132123544e-06, "loss": 0.5364, "step": 10543 }, { "epoch": 1.34, "grad_norm": 0.6587259348762554, "learning_rate": 2.570304862673896e-06, "loss": 0.4672, "step": 10544 }, { "epoch": 1.34, "grad_norm": 0.8102807501813335, "learning_rate": 2.5694033155597962e-06, "loss": 0.4768, "step": 10545 }, { "epoch": 1.34, "grad_norm": 0.7204510924799229, "learning_rate": 2.5685018719084286e-06, "loss": 0.5349, "step": 10546 }, { "epoch": 1.34, "grad_norm": 0.6724871736736747, "learning_rate": 2.567600531758167e-06, "loss": 0.4485, "step": 10547 }, { "epoch": 1.34, "grad_norm": 0.8287670774381877, "learning_rate": 2.566699295147379e-06, "loss": 0.5701, "step": 10548 }, { "epoch": 1.34, "grad_norm": 0.7504033198606173, "learning_rate": 2.5657981621144252e-06, "loss": 0.5133, "step": 10549 }, { "epoch": 1.34, "grad_norm": 0.6510095384940816, "learning_rate": 2.5648971326976667e-06, "loss": 0.4398, "step": 10550 }, { "epoch": 1.34, "grad_norm": 0.7776999421851082, "learning_rate": 2.5639962069354534e-06, "loss": 0.5608, "step": 10551 }, { "epoch": 1.34, "grad_norm": 0.8928014711124543, "learning_rate": 2.56309538486614e-06, "loss": 0.5259, "step": 10552 }, { "epoch": 1.34, "grad_norm": 0.6441973623516792, "learning_rate": 2.5621946665280663e-06, "loss": 0.5076, "step": 10553 }, { "epoch": 1.34, "grad_norm": 0.7301758034332348, "learning_rate": 2.5612940519595774e-06, "loss": 0.4537, "step": 10554 }, { "epoch": 1.34, "grad_norm": 0.6560502718525917, "learning_rate": 2.5603935411990056e-06, "loss": 0.4863, "step": 10555 }, { "epoch": 1.34, "grad_norm": 1.0806354582198998, "learning_rate": 2.5594931342846874e-06, "loss": 0.5566, "step": 10556 }, { "epoch": 1.34, "grad_norm": 0.9032036502313324, "learning_rate": 2.558592831254946e-06, "loss": 0.5064, "step": 10557 }, { "epoch": 1.35, "grad_norm": 0.6583914367167848, "learning_rate": 2.557692632148107e-06, "loss": 0.4844, "step": 10558 }, { "epoch": 1.35, "grad_norm": 0.55842124426622, "learning_rate": 2.556792537002485e-06, "loss": 0.4443, "step": 10559 }, { "epoch": 1.35, "grad_norm": 0.7925442216745742, "learning_rate": 2.555892545856401e-06, "loss": 0.4937, "step": 10560 }, { "epoch": 1.35, "grad_norm": 0.6272378172920655, "learning_rate": 2.5549926587481596e-06, "loss": 0.4897, "step": 10561 }, { "epoch": 1.35, "grad_norm": 0.5655246931149864, "learning_rate": 2.554092875716069e-06, "loss": 0.4677, "step": 10562 }, { "epoch": 1.35, "grad_norm": 0.683200082924845, "learning_rate": 2.553193196798427e-06, "loss": 0.5002, "step": 10563 }, { "epoch": 1.35, "grad_norm": 0.6882389962346854, "learning_rate": 2.5522936220335337e-06, "loss": 0.4934, "step": 10564 }, { "epoch": 1.35, "grad_norm": 0.7629533289317983, "learning_rate": 2.5513941514596766e-06, "loss": 0.5429, "step": 10565 }, { "epoch": 1.35, "grad_norm": 0.7484197632077519, "learning_rate": 2.5504947851151486e-06, "loss": 0.5382, "step": 10566 }, { "epoch": 1.35, "grad_norm": 0.7874615109126454, "learning_rate": 2.5495955230382275e-06, "loss": 0.5133, "step": 10567 }, { "epoch": 1.35, "grad_norm": 0.6060312838473805, "learning_rate": 2.548696365267197e-06, "loss": 0.4946, "step": 10568 }, { "epoch": 1.35, "grad_norm": 0.6841542014755738, "learning_rate": 2.547797311840327e-06, "loss": 0.5041, "step": 10569 }, { "epoch": 1.35, "grad_norm": 0.635146392666409, "learning_rate": 2.546898362795891e-06, "loss": 0.4888, "step": 10570 }, { "epoch": 1.35, "grad_norm": 0.5969763884589779, "learning_rate": 2.5459995181721493e-06, "loss": 0.4823, "step": 10571 }, { "epoch": 1.35, "grad_norm": 0.6279444187968927, "learning_rate": 2.54510077800737e-06, "loss": 0.4376, "step": 10572 }, { "epoch": 1.35, "grad_norm": 0.7474066497095003, "learning_rate": 2.5442021423398034e-06, "loss": 0.4676, "step": 10573 }, { "epoch": 1.35, "grad_norm": 0.8083824948346612, "learning_rate": 2.5433036112077064e-06, "loss": 0.4662, "step": 10574 }, { "epoch": 1.35, "grad_norm": 0.6992772934404534, "learning_rate": 2.5424051846493225e-06, "loss": 0.4861, "step": 10575 }, { "epoch": 1.35, "grad_norm": 0.8115358118946803, "learning_rate": 2.541506862702898e-06, "loss": 0.5079, "step": 10576 }, { "epoch": 1.35, "grad_norm": 0.5898076156590896, "learning_rate": 2.540608645406668e-06, "loss": 0.4876, "step": 10577 }, { "epoch": 1.35, "grad_norm": 0.6591181353526833, "learning_rate": 2.539710532798871e-06, "loss": 0.432, "step": 10578 }, { "epoch": 1.35, "grad_norm": 0.6483357111405805, "learning_rate": 2.538812524917733e-06, "loss": 0.4906, "step": 10579 }, { "epoch": 1.35, "grad_norm": 0.7201600310027874, "learning_rate": 2.5379146218014828e-06, "loss": 0.5091, "step": 10580 }, { "epoch": 1.35, "grad_norm": 0.7601836067444325, "learning_rate": 2.5370168234883375e-06, "loss": 0.5357, "step": 10581 }, { "epoch": 1.35, "grad_norm": 0.6314016482768607, "learning_rate": 2.536119130016518e-06, "loss": 0.51, "step": 10582 }, { "epoch": 1.35, "grad_norm": 0.6448711450255533, "learning_rate": 2.535221541424231e-06, "loss": 0.5138, "step": 10583 }, { "epoch": 1.35, "grad_norm": 0.7122061061104349, "learning_rate": 2.5343240577496864e-06, "loss": 0.5563, "step": 10584 }, { "epoch": 1.35, "grad_norm": 0.8436835601495344, "learning_rate": 2.533426679031088e-06, "loss": 0.5324, "step": 10585 }, { "epoch": 1.35, "grad_norm": 0.5453104602450376, "learning_rate": 2.532529405306636e-06, "loss": 0.4452, "step": 10586 }, { "epoch": 1.35, "grad_norm": 0.6346023638934389, "learning_rate": 2.5316322366145195e-06, "loss": 0.4616, "step": 10587 }, { "epoch": 1.35, "grad_norm": 0.7173474582293952, "learning_rate": 2.5307351729929334e-06, "loss": 0.4771, "step": 10588 }, { "epoch": 1.35, "grad_norm": 0.5371831843226925, "learning_rate": 2.5298382144800583e-06, "loss": 0.4291, "step": 10589 }, { "epoch": 1.35, "grad_norm": 0.7471570013389153, "learning_rate": 2.528941361114079e-06, "loss": 0.5323, "step": 10590 }, { "epoch": 1.35, "grad_norm": 0.7050985320255504, "learning_rate": 2.5280446129331675e-06, "loss": 0.4929, "step": 10591 }, { "epoch": 1.35, "grad_norm": 0.6876108715515993, "learning_rate": 2.5271479699754996e-06, "loss": 0.487, "step": 10592 }, { "epoch": 1.35, "grad_norm": 0.8497204672920528, "learning_rate": 2.5262514322792387e-06, "loss": 0.5103, "step": 10593 }, { "epoch": 1.35, "grad_norm": 0.7966053083109497, "learning_rate": 2.525354999882551e-06, "loss": 0.5476, "step": 10594 }, { "epoch": 1.35, "grad_norm": 0.6943922207606278, "learning_rate": 2.524458672823592e-06, "loss": 0.5145, "step": 10595 }, { "epoch": 1.35, "grad_norm": 0.7621090276845341, "learning_rate": 2.5235624511405153e-06, "loss": 0.5201, "step": 10596 }, { "epoch": 1.35, "grad_norm": 0.6480220236340348, "learning_rate": 2.522666334871474e-06, "loss": 0.4302, "step": 10597 }, { "epoch": 1.35, "grad_norm": 0.5352004794161997, "learning_rate": 2.5217703240546087e-06, "loss": 0.4377, "step": 10598 }, { "epoch": 1.35, "grad_norm": 0.6158427016130312, "learning_rate": 2.520874418728063e-06, "loss": 0.4572, "step": 10599 }, { "epoch": 1.35, "grad_norm": 0.6290232580546541, "learning_rate": 2.5199786189299703e-06, "loss": 0.4678, "step": 10600 }, { "epoch": 1.35, "grad_norm": 0.8433508395950833, "learning_rate": 2.519082924698464e-06, "loss": 0.5943, "step": 10601 }, { "epoch": 1.35, "grad_norm": 0.7075211559000786, "learning_rate": 2.5181873360716667e-06, "loss": 0.5505, "step": 10602 }, { "epoch": 1.35, "grad_norm": 0.7983628484356027, "learning_rate": 2.5172918530877048e-06, "loss": 0.5217, "step": 10603 }, { "epoch": 1.35, "grad_norm": 0.6361455888035236, "learning_rate": 2.516396475784697e-06, "loss": 0.4441, "step": 10604 }, { "epoch": 1.35, "grad_norm": 0.6358294421365027, "learning_rate": 2.515501204200753e-06, "loss": 0.4485, "step": 10605 }, { "epoch": 1.35, "grad_norm": 0.6299572756032057, "learning_rate": 2.514606038373985e-06, "loss": 0.4884, "step": 10606 }, { "epoch": 1.35, "grad_norm": 0.7507582170250209, "learning_rate": 2.513710978342494e-06, "loss": 0.4692, "step": 10607 }, { "epoch": 1.35, "grad_norm": 0.6674577672579849, "learning_rate": 2.512816024144382e-06, "loss": 0.4979, "step": 10608 }, { "epoch": 1.35, "grad_norm": 0.7385789629712277, "learning_rate": 2.5119211758177454e-06, "loss": 0.5023, "step": 10609 }, { "epoch": 1.35, "grad_norm": 0.7671958718927128, "learning_rate": 2.5110264334006714e-06, "loss": 0.524, "step": 10610 }, { "epoch": 1.35, "grad_norm": 1.1830286879294056, "learning_rate": 2.5101317969312505e-06, "loss": 0.4849, "step": 10611 }, { "epoch": 1.35, "grad_norm": 0.6926632994338192, "learning_rate": 2.5092372664475607e-06, "loss": 0.4315, "step": 10612 }, { "epoch": 1.35, "grad_norm": 0.6506380234794855, "learning_rate": 2.5083428419876833e-06, "loss": 0.4335, "step": 10613 }, { "epoch": 1.35, "grad_norm": 0.6677770486213096, "learning_rate": 2.5074485235896873e-06, "loss": 0.4615, "step": 10614 }, { "epoch": 1.35, "grad_norm": 0.6422924304526997, "learning_rate": 2.5065543112916434e-06, "loss": 0.396, "step": 10615 }, { "epoch": 1.35, "grad_norm": 0.6540009378319542, "learning_rate": 2.5056602051316136e-06, "loss": 0.4973, "step": 10616 }, { "epoch": 1.35, "grad_norm": 0.7575700931995678, "learning_rate": 2.5047662051476597e-06, "loss": 0.5024, "step": 10617 }, { "epoch": 1.35, "grad_norm": 0.6582059655711523, "learning_rate": 2.503872311377833e-06, "loss": 0.4495, "step": 10618 }, { "epoch": 1.35, "grad_norm": 0.6696795396076224, "learning_rate": 2.502978523860187e-06, "loss": 0.4741, "step": 10619 }, { "epoch": 1.35, "grad_norm": 0.6435913695776708, "learning_rate": 2.502084842632765e-06, "loss": 0.483, "step": 10620 }, { "epoch": 1.35, "grad_norm": 0.730353290183102, "learning_rate": 2.501191267733608e-06, "loss": 0.4844, "step": 10621 }, { "epoch": 1.35, "grad_norm": 0.6335069027394884, "learning_rate": 2.5002977992007538e-06, "loss": 0.4521, "step": 10622 }, { "epoch": 1.35, "grad_norm": 0.6154280973499405, "learning_rate": 2.499404437072237e-06, "loss": 0.4934, "step": 10623 }, { "epoch": 1.35, "grad_norm": 0.5850241018083346, "learning_rate": 2.4985111813860803e-06, "loss": 0.4782, "step": 10624 }, { "epoch": 1.35, "grad_norm": 0.6933493158022256, "learning_rate": 2.4976180321803107e-06, "loss": 0.4494, "step": 10625 }, { "epoch": 1.35, "grad_norm": 0.6635587136374863, "learning_rate": 2.4967249894929436e-06, "loss": 0.4941, "step": 10626 }, { "epoch": 1.35, "grad_norm": 0.625311579433706, "learning_rate": 2.4958320533619956e-06, "loss": 0.4488, "step": 10627 }, { "epoch": 1.35, "grad_norm": 0.6327476384586453, "learning_rate": 2.494939223825473e-06, "loss": 0.4513, "step": 10628 }, { "epoch": 1.35, "grad_norm": 0.773105806783291, "learning_rate": 2.494046500921385e-06, "loss": 0.5474, "step": 10629 }, { "epoch": 1.35, "grad_norm": 0.6091867867165777, "learning_rate": 2.4931538846877278e-06, "loss": 0.4876, "step": 10630 }, { "epoch": 1.35, "grad_norm": 0.7491279739456798, "learning_rate": 2.4922613751625003e-06, "loss": 0.4838, "step": 10631 }, { "epoch": 1.35, "grad_norm": 0.6898837828551352, "learning_rate": 2.4913689723836908e-06, "loss": 0.4972, "step": 10632 }, { "epoch": 1.35, "grad_norm": 0.6778226639889613, "learning_rate": 2.490476676389288e-06, "loss": 0.5227, "step": 10633 }, { "epoch": 1.35, "grad_norm": 0.7018927983205488, "learning_rate": 2.4895844872172737e-06, "loss": 0.5119, "step": 10634 }, { "epoch": 1.35, "grad_norm": 0.8155583147329938, "learning_rate": 2.4886924049056273e-06, "loss": 0.5657, "step": 10635 }, { "epoch": 1.35, "grad_norm": 0.8061910783214565, "learning_rate": 2.4878004294923187e-06, "loss": 0.4525, "step": 10636 }, { "epoch": 1.36, "grad_norm": 0.5574081462277762, "learning_rate": 2.4869085610153193e-06, "loss": 0.4306, "step": 10637 }, { "epoch": 1.36, "grad_norm": 0.6670760635317258, "learning_rate": 2.48601679951259e-06, "loss": 0.4747, "step": 10638 }, { "epoch": 1.36, "grad_norm": 0.570575085827016, "learning_rate": 2.485125145022094e-06, "loss": 0.4678, "step": 10639 }, { "epoch": 1.36, "grad_norm": 0.7311714371413807, "learning_rate": 2.4842335975817825e-06, "loss": 0.5276, "step": 10640 }, { "epoch": 1.36, "grad_norm": 0.7817605699699901, "learning_rate": 2.4833421572296094e-06, "loss": 0.5376, "step": 10641 }, { "epoch": 1.36, "grad_norm": 0.7084791512606515, "learning_rate": 2.4824508240035167e-06, "loss": 0.459, "step": 10642 }, { "epoch": 1.36, "grad_norm": 0.5426925526301956, "learning_rate": 2.481559597941449e-06, "loss": 0.4346, "step": 10643 }, { "epoch": 1.36, "grad_norm": 0.7486718158103108, "learning_rate": 2.48066847908134e-06, "loss": 0.4588, "step": 10644 }, { "epoch": 1.36, "grad_norm": 0.7632464265884233, "learning_rate": 2.479777467461125e-06, "loss": 0.4987, "step": 10645 }, { "epoch": 1.36, "grad_norm": 0.6019727398459266, "learning_rate": 2.478886563118726e-06, "loss": 0.4955, "step": 10646 }, { "epoch": 1.36, "grad_norm": 0.5631170630230982, "learning_rate": 2.477995766092073e-06, "loss": 0.4184, "step": 10647 }, { "epoch": 1.36, "grad_norm": 0.6501602303015634, "learning_rate": 2.4771050764190795e-06, "loss": 0.4438, "step": 10648 }, { "epoch": 1.36, "grad_norm": 0.6297624260295119, "learning_rate": 2.4762144941376627e-06, "loss": 0.457, "step": 10649 }, { "epoch": 1.36, "grad_norm": 0.604528205843405, "learning_rate": 2.475324019285728e-06, "loss": 0.4524, "step": 10650 }, { "epoch": 1.36, "grad_norm": 0.5504661044380628, "learning_rate": 2.4744336519011848e-06, "loss": 0.4308, "step": 10651 }, { "epoch": 1.36, "grad_norm": 0.7918434571931015, "learning_rate": 2.4735433920219275e-06, "loss": 0.4656, "step": 10652 }, { "epoch": 1.36, "grad_norm": 0.5493917510346389, "learning_rate": 2.472653239685857e-06, "loss": 0.3887, "step": 10653 }, { "epoch": 1.36, "grad_norm": 0.5945150597257793, "learning_rate": 2.4717631949308603e-06, "loss": 0.4757, "step": 10654 }, { "epoch": 1.36, "grad_norm": 0.7060958545896473, "learning_rate": 2.4708732577948263e-06, "loss": 0.5, "step": 10655 }, { "epoch": 1.36, "grad_norm": 0.530397584421947, "learning_rate": 2.4699834283156342e-06, "loss": 0.4229, "step": 10656 }, { "epoch": 1.36, "grad_norm": 0.7672624101885411, "learning_rate": 2.4690937065311647e-06, "loss": 0.506, "step": 10657 }, { "epoch": 1.36, "grad_norm": 0.7874569311621559, "learning_rate": 2.468204092479287e-06, "loss": 0.5733, "step": 10658 }, { "epoch": 1.36, "grad_norm": 0.8460276648134271, "learning_rate": 2.4673145861978705e-06, "loss": 0.5846, "step": 10659 }, { "epoch": 1.36, "grad_norm": 0.7828687831319213, "learning_rate": 2.466425187724781e-06, "loss": 0.5239, "step": 10660 }, { "epoch": 1.36, "grad_norm": 0.7700224106343307, "learning_rate": 2.465535897097872e-06, "loss": 0.4734, "step": 10661 }, { "epoch": 1.36, "grad_norm": 0.5346197722840267, "learning_rate": 2.464646714355004e-06, "loss": 0.4442, "step": 10662 }, { "epoch": 1.36, "grad_norm": 0.6108654957329758, "learning_rate": 2.4637576395340214e-06, "loss": 0.4543, "step": 10663 }, { "epoch": 1.36, "grad_norm": 0.7150591385887055, "learning_rate": 2.4628686726727708e-06, "loss": 0.5846, "step": 10664 }, { "epoch": 1.36, "grad_norm": 0.7584122496864705, "learning_rate": 2.4619798138090954e-06, "loss": 0.5119, "step": 10665 }, { "epoch": 1.36, "grad_norm": 0.7356999300457018, "learning_rate": 2.4610910629808273e-06, "loss": 0.4591, "step": 10666 }, { "epoch": 1.36, "grad_norm": 0.6135349839298728, "learning_rate": 2.4602024202258013e-06, "loss": 0.5139, "step": 10667 }, { "epoch": 1.36, "grad_norm": 0.801710744306865, "learning_rate": 2.4593138855818407e-06, "loss": 0.5061, "step": 10668 }, { "epoch": 1.36, "grad_norm": 0.6001626031433973, "learning_rate": 2.4584254590867702e-06, "loss": 0.5057, "step": 10669 }, { "epoch": 1.36, "grad_norm": 0.7235302494936137, "learning_rate": 2.457537140778405e-06, "loss": 0.5199, "step": 10670 }, { "epoch": 1.36, "grad_norm": 0.603529320577914, "learning_rate": 2.456648930694559e-06, "loss": 0.4864, "step": 10671 }, { "epoch": 1.36, "grad_norm": 0.6812204286898121, "learning_rate": 2.4557608288730424e-06, "loss": 0.5462, "step": 10672 }, { "epoch": 1.36, "grad_norm": 0.7765607089730986, "learning_rate": 2.454872835351655e-06, "loss": 0.4848, "step": 10673 }, { "epoch": 1.36, "grad_norm": 0.7113788664381167, "learning_rate": 2.4539849501682e-06, "loss": 0.4204, "step": 10674 }, { "epoch": 1.36, "grad_norm": 0.6469614434228078, "learning_rate": 2.453097173360468e-06, "loss": 0.452, "step": 10675 }, { "epoch": 1.36, "grad_norm": 0.6888606628738865, "learning_rate": 2.4522095049662526e-06, "loss": 0.4846, "step": 10676 }, { "epoch": 1.36, "grad_norm": 0.7524738566659702, "learning_rate": 2.451321945023335e-06, "loss": 0.5014, "step": 10677 }, { "epoch": 1.36, "grad_norm": 0.6921572770796952, "learning_rate": 2.4504344935694996e-06, "loss": 0.5261, "step": 10678 }, { "epoch": 1.36, "grad_norm": 0.7199160789820862, "learning_rate": 2.4495471506425183e-06, "loss": 0.4785, "step": 10679 }, { "epoch": 1.36, "grad_norm": 0.6071135882596962, "learning_rate": 2.4486599162801674e-06, "loss": 0.4319, "step": 10680 }, { "epoch": 1.36, "grad_norm": 0.5683515544675386, "learning_rate": 2.4477727905202085e-06, "loss": 0.3997, "step": 10681 }, { "epoch": 1.36, "grad_norm": 0.5807853982951335, "learning_rate": 2.446885773400408e-06, "loss": 0.4772, "step": 10682 }, { "epoch": 1.36, "grad_norm": 0.6791670099245443, "learning_rate": 2.4459988649585182e-06, "loss": 0.4622, "step": 10683 }, { "epoch": 1.36, "grad_norm": 0.6014193527221852, "learning_rate": 2.445112065232299e-06, "loss": 0.4926, "step": 10684 }, { "epoch": 1.36, "grad_norm": 0.6813237596867474, "learning_rate": 2.444225374259493e-06, "loss": 0.4753, "step": 10685 }, { "epoch": 1.36, "grad_norm": 0.8235857899859498, "learning_rate": 2.4433387920778475e-06, "loss": 0.5193, "step": 10686 }, { "epoch": 1.36, "grad_norm": 0.7172004316474528, "learning_rate": 2.4424523187250974e-06, "loss": 0.4809, "step": 10687 }, { "epoch": 1.36, "grad_norm": 1.4542885268455246, "learning_rate": 2.441565954238982e-06, "loss": 0.4996, "step": 10688 }, { "epoch": 1.36, "grad_norm": 0.7140273438417358, "learning_rate": 2.4406796986572256e-06, "loss": 0.5256, "step": 10689 }, { "epoch": 1.36, "grad_norm": 0.5816882506093489, "learning_rate": 2.439793552017559e-06, "loss": 0.4375, "step": 10690 }, { "epoch": 1.36, "grad_norm": 0.6688746931103752, "learning_rate": 2.438907514357697e-06, "loss": 0.4691, "step": 10691 }, { "epoch": 1.36, "grad_norm": 0.7911759252836437, "learning_rate": 2.43802158571536e-06, "loss": 0.4808, "step": 10692 }, { "epoch": 1.36, "grad_norm": 0.6997345922810209, "learning_rate": 2.437135766128255e-06, "loss": 0.441, "step": 10693 }, { "epoch": 1.36, "grad_norm": 0.7697127989619299, "learning_rate": 2.4362500556340936e-06, "loss": 0.5065, "step": 10694 }, { "epoch": 1.36, "grad_norm": 0.76261356575478, "learning_rate": 2.4353644542705703e-06, "loss": 0.4966, "step": 10695 }, { "epoch": 1.36, "grad_norm": 0.5992984772511316, "learning_rate": 2.43447896207539e-06, "loss": 0.5088, "step": 10696 }, { "epoch": 1.36, "grad_norm": 0.7693345937281759, "learning_rate": 2.4335935790862406e-06, "loss": 0.5733, "step": 10697 }, { "epoch": 1.36, "grad_norm": 0.7374141415594203, "learning_rate": 2.432708305340814e-06, "loss": 0.5348, "step": 10698 }, { "epoch": 1.36, "grad_norm": 0.6752949821992592, "learning_rate": 2.431823140876788e-06, "loss": 0.4986, "step": 10699 }, { "epoch": 1.36, "grad_norm": 0.5494342031098339, "learning_rate": 2.430938085731847e-06, "loss": 0.4069, "step": 10700 }, { "epoch": 1.36, "grad_norm": 0.6345041032562396, "learning_rate": 2.43005313994366e-06, "loss": 0.4874, "step": 10701 }, { "epoch": 1.36, "grad_norm": 0.7599971000903185, "learning_rate": 2.4291683035499004e-06, "loss": 0.5238, "step": 10702 }, { "epoch": 1.36, "grad_norm": 0.7380009544892384, "learning_rate": 2.4282835765882286e-06, "loss": 0.5222, "step": 10703 }, { "epoch": 1.36, "grad_norm": 0.7670832887018235, "learning_rate": 2.4273989590963093e-06, "loss": 0.5281, "step": 10704 }, { "epoch": 1.36, "grad_norm": 0.5904091697497313, "learning_rate": 2.426514451111793e-06, "loss": 0.473, "step": 10705 }, { "epoch": 1.36, "grad_norm": 0.6331370955412378, "learning_rate": 2.4256300526723352e-06, "loss": 0.4478, "step": 10706 }, { "epoch": 1.36, "grad_norm": 0.6468386643533353, "learning_rate": 2.4247457638155775e-06, "loss": 0.4845, "step": 10707 }, { "epoch": 1.36, "grad_norm": 0.6339230424769525, "learning_rate": 2.4238615845791636e-06, "loss": 0.5139, "step": 10708 }, { "epoch": 1.36, "grad_norm": 0.8816016484557027, "learning_rate": 2.4229775150007295e-06, "loss": 0.5033, "step": 10709 }, { "epoch": 1.36, "grad_norm": 0.6510093955007307, "learning_rate": 2.422093555117909e-06, "loss": 0.4568, "step": 10710 }, { "epoch": 1.36, "grad_norm": 0.5889514282845364, "learning_rate": 2.421209704968327e-06, "loss": 0.4352, "step": 10711 }, { "epoch": 1.36, "grad_norm": 0.6342082783199369, "learning_rate": 2.420325964589609e-06, "loss": 0.4202, "step": 10712 }, { "epoch": 1.36, "grad_norm": 0.6272517155245021, "learning_rate": 2.4194423340193696e-06, "loss": 0.5029, "step": 10713 }, { "epoch": 1.36, "grad_norm": 0.7838106229972467, "learning_rate": 2.418558813295225e-06, "loss": 0.484, "step": 10714 }, { "epoch": 1.37, "grad_norm": 0.939730119450188, "learning_rate": 2.417675402454781e-06, "loss": 0.5929, "step": 10715 }, { "epoch": 1.37, "grad_norm": 0.730871296229932, "learning_rate": 2.416792101535645e-06, "loss": 0.5235, "step": 10716 }, { "epoch": 1.37, "grad_norm": 0.5420394027454228, "learning_rate": 2.4159089105754123e-06, "loss": 0.4323, "step": 10717 }, { "epoch": 1.37, "grad_norm": 0.5699369533277572, "learning_rate": 2.4150258296116825e-06, "loss": 0.4385, "step": 10718 }, { "epoch": 1.37, "grad_norm": 0.6008010964580018, "learning_rate": 2.41414285868204e-06, "loss": 0.456, "step": 10719 }, { "epoch": 1.37, "grad_norm": 0.6243355450781678, "learning_rate": 2.4132599978240727e-06, "loss": 0.4591, "step": 10720 }, { "epoch": 1.37, "grad_norm": 1.0238150269553674, "learning_rate": 2.4123772470753636e-06, "loss": 0.5906, "step": 10721 }, { "epoch": 1.37, "grad_norm": 0.7473902968138395, "learning_rate": 2.4114946064734845e-06, "loss": 0.4959, "step": 10722 }, { "epoch": 1.37, "grad_norm": 0.6653613205142198, "learning_rate": 2.4106120760560105e-06, "loss": 0.49, "step": 10723 }, { "epoch": 1.37, "grad_norm": 0.7572531889262955, "learning_rate": 2.4097296558605034e-06, "loss": 0.4649, "step": 10724 }, { "epoch": 1.37, "grad_norm": 0.5732860034865839, "learning_rate": 2.408847345924529e-06, "loss": 0.4335, "step": 10725 }, { "epoch": 1.37, "grad_norm": 0.5664822620728239, "learning_rate": 2.407965146285642e-06, "loss": 0.4193, "step": 10726 }, { "epoch": 1.37, "grad_norm": 0.6099452083868128, "learning_rate": 2.4070830569813954e-06, "loss": 0.4613, "step": 10727 }, { "epoch": 1.37, "grad_norm": 0.6531283756369385, "learning_rate": 2.406201078049339e-06, "loss": 0.4591, "step": 10728 }, { "epoch": 1.37, "grad_norm": 0.6071837708400514, "learning_rate": 2.4053192095270122e-06, "loss": 0.4863, "step": 10729 }, { "epoch": 1.37, "grad_norm": 0.726671450738999, "learning_rate": 2.4044374514519575e-06, "loss": 0.5041, "step": 10730 }, { "epoch": 1.37, "grad_norm": 0.6753116282711898, "learning_rate": 2.403555803861704e-06, "loss": 0.5246, "step": 10731 }, { "epoch": 1.37, "grad_norm": 0.7845088803810617, "learning_rate": 2.402674266793783e-06, "loss": 0.5073, "step": 10732 }, { "epoch": 1.37, "grad_norm": 0.5858019626031771, "learning_rate": 2.4017928402857206e-06, "loss": 0.4947, "step": 10733 }, { "epoch": 1.37, "grad_norm": 0.7594251192336049, "learning_rate": 2.400911524375032e-06, "loss": 0.5059, "step": 10734 }, { "epoch": 1.37, "grad_norm": 0.6436294907042814, "learning_rate": 2.400030319099237e-06, "loss": 0.4427, "step": 10735 }, { "epoch": 1.37, "grad_norm": 0.665587087036889, "learning_rate": 2.3991492244958403e-06, "loss": 0.4547, "step": 10736 }, { "epoch": 1.37, "grad_norm": 0.7935765655517953, "learning_rate": 2.398268240602352e-06, "loss": 0.4977, "step": 10737 }, { "epoch": 1.37, "grad_norm": 0.5973192112742913, "learning_rate": 2.3973873674562693e-06, "loss": 0.4672, "step": 10738 }, { "epoch": 1.37, "grad_norm": 0.6393252440505999, "learning_rate": 2.3965066050950913e-06, "loss": 0.4601, "step": 10739 }, { "epoch": 1.37, "grad_norm": 0.6370672488453737, "learning_rate": 2.3956259535563054e-06, "loss": 0.4552, "step": 10740 }, { "epoch": 1.37, "grad_norm": 0.6482178678751401, "learning_rate": 2.394745412877403e-06, "loss": 0.4786, "step": 10741 }, { "epoch": 1.37, "grad_norm": 0.6032793464429704, "learning_rate": 2.3938649830958606e-06, "loss": 0.5129, "step": 10742 }, { "epoch": 1.37, "grad_norm": 0.8323148011681242, "learning_rate": 2.3929846642491595e-06, "loss": 0.4937, "step": 10743 }, { "epoch": 1.37, "grad_norm": 0.5627431230471072, "learning_rate": 2.3921044563747685e-06, "loss": 0.428, "step": 10744 }, { "epoch": 1.37, "grad_norm": 0.6393055905578594, "learning_rate": 2.3912243595101574e-06, "loss": 0.4671, "step": 10745 }, { "epoch": 1.37, "grad_norm": 0.5901338716140343, "learning_rate": 2.3903443736927897e-06, "loss": 0.4859, "step": 10746 }, { "epoch": 1.37, "grad_norm": 0.7812220224675036, "learning_rate": 2.3894644989601236e-06, "loss": 0.526, "step": 10747 }, { "epoch": 1.37, "grad_norm": 0.5606310442516241, "learning_rate": 2.38858473534961e-06, "loss": 0.4667, "step": 10748 }, { "epoch": 1.37, "grad_norm": 0.693223459316275, "learning_rate": 2.3877050828987016e-06, "loss": 0.4727, "step": 10749 }, { "epoch": 1.37, "grad_norm": 0.6675015607182109, "learning_rate": 2.3868255416448377e-06, "loss": 0.5087, "step": 10750 }, { "epoch": 1.37, "grad_norm": 0.6886864857442677, "learning_rate": 2.385946111625462e-06, "loss": 0.46, "step": 10751 }, { "epoch": 1.37, "grad_norm": 0.6102865259761342, "learning_rate": 2.385066792878005e-06, "loss": 0.4672, "step": 10752 }, { "epoch": 1.37, "grad_norm": 0.7018802541894525, "learning_rate": 2.384187585439901e-06, "loss": 0.4813, "step": 10753 }, { "epoch": 1.37, "grad_norm": 0.6890841022637195, "learning_rate": 2.3833084893485705e-06, "loss": 0.4756, "step": 10754 }, { "epoch": 1.37, "grad_norm": 0.7642755706983619, "learning_rate": 2.382429504641437e-06, "loss": 0.4847, "step": 10755 }, { "epoch": 1.37, "grad_norm": 0.8206867294764637, "learning_rate": 2.381550631355914e-06, "loss": 0.469, "step": 10756 }, { "epoch": 1.37, "grad_norm": 2.058137301693994, "learning_rate": 2.3806718695294124e-06, "loss": 0.4359, "step": 10757 }, { "epoch": 1.37, "grad_norm": 0.7446157661237356, "learning_rate": 2.3797932191993396e-06, "loss": 0.4761, "step": 10758 }, { "epoch": 1.37, "grad_norm": 0.7050987456196054, "learning_rate": 2.3789146804030984e-06, "loss": 0.4839, "step": 10759 }, { "epoch": 1.37, "grad_norm": 0.7544500047765144, "learning_rate": 2.3780362531780815e-06, "loss": 0.509, "step": 10760 }, { "epoch": 1.37, "grad_norm": 0.8407991862856738, "learning_rate": 2.377157937561684e-06, "loss": 0.5663, "step": 10761 }, { "epoch": 1.37, "grad_norm": 0.7558720128445902, "learning_rate": 2.376279733591291e-06, "loss": 0.4997, "step": 10762 }, { "epoch": 1.37, "grad_norm": 0.6258939976241674, "learning_rate": 2.375401641304287e-06, "loss": 0.4836, "step": 10763 }, { "epoch": 1.37, "grad_norm": 0.7867419421900359, "learning_rate": 2.374523660738046e-06, "loss": 0.5363, "step": 10764 }, { "epoch": 1.37, "grad_norm": 0.7395039651205694, "learning_rate": 2.3736457919299446e-06, "loss": 0.4981, "step": 10765 }, { "epoch": 1.37, "grad_norm": 0.7405534995697715, "learning_rate": 2.3727680349173475e-06, "loss": 0.5776, "step": 10766 }, { "epoch": 1.37, "grad_norm": 0.7104559397257032, "learning_rate": 2.3718903897376216e-06, "loss": 0.4762, "step": 10767 }, { "epoch": 1.37, "grad_norm": 0.6542246828416545, "learning_rate": 2.371012856428122e-06, "loss": 0.4525, "step": 10768 }, { "epoch": 1.37, "grad_norm": 0.5609690670930079, "learning_rate": 2.370135435026206e-06, "loss": 0.4263, "step": 10769 }, { "epoch": 1.37, "grad_norm": 0.8051835397134139, "learning_rate": 2.3692581255692167e-06, "loss": 0.5298, "step": 10770 }, { "epoch": 1.37, "grad_norm": 0.7743788436099871, "learning_rate": 2.368380928094506e-06, "loss": 0.5792, "step": 10771 }, { "epoch": 1.37, "grad_norm": 0.6776847729356127, "learning_rate": 2.3675038426394078e-06, "loss": 0.5239, "step": 10772 }, { "epoch": 1.37, "grad_norm": 0.6080921980201661, "learning_rate": 2.3666268692412603e-06, "loss": 0.4805, "step": 10773 }, { "epoch": 1.37, "grad_norm": 0.6871087491999675, "learning_rate": 2.365750007937391e-06, "loss": 0.4787, "step": 10774 }, { "epoch": 1.37, "grad_norm": 0.7243648822500646, "learning_rate": 2.3648732587651268e-06, "loss": 0.5107, "step": 10775 }, { "epoch": 1.37, "grad_norm": 0.6034248568715802, "learning_rate": 2.3639966217617868e-06, "loss": 0.5041, "step": 10776 }, { "epoch": 1.37, "grad_norm": 1.0120335372650437, "learning_rate": 2.363120096964688e-06, "loss": 0.5738, "step": 10777 }, { "epoch": 1.37, "grad_norm": 0.7415451815977266, "learning_rate": 2.3622436844111384e-06, "loss": 0.556, "step": 10778 }, { "epoch": 1.37, "grad_norm": 0.8256111580997718, "learning_rate": 2.361367384138448e-06, "loss": 0.5658, "step": 10779 }, { "epoch": 1.37, "grad_norm": 0.719510323500556, "learning_rate": 2.360491196183915e-06, "loss": 0.5292, "step": 10780 }, { "epoch": 1.37, "grad_norm": 1.004692350210955, "learning_rate": 2.359615120584839e-06, "loss": 0.559, "step": 10781 }, { "epoch": 1.37, "grad_norm": 0.7024380001621336, "learning_rate": 2.3587391573785073e-06, "loss": 0.533, "step": 10782 }, { "epoch": 1.37, "grad_norm": 0.5796511462605038, "learning_rate": 2.35786330660221e-06, "loss": 0.4513, "step": 10783 }, { "epoch": 1.37, "grad_norm": 0.8258690913604891, "learning_rate": 2.3569875682932304e-06, "loss": 0.5415, "step": 10784 }, { "epoch": 1.37, "grad_norm": 0.6129396432400224, "learning_rate": 2.356111942488842e-06, "loss": 0.5035, "step": 10785 }, { "epoch": 1.37, "grad_norm": 0.7709876421865782, "learning_rate": 2.3552364292263215e-06, "loss": 0.6035, "step": 10786 }, { "epoch": 1.37, "grad_norm": 0.7091236168630716, "learning_rate": 2.3543610285429332e-06, "loss": 0.4916, "step": 10787 }, { "epoch": 1.37, "grad_norm": 0.5715637774864741, "learning_rate": 2.3534857404759414e-06, "loss": 0.478, "step": 10788 }, { "epoch": 1.37, "grad_norm": 0.6525523934717318, "learning_rate": 2.352610565062606e-06, "loss": 0.4162, "step": 10789 }, { "epoch": 1.37, "grad_norm": 0.6228413227067008, "learning_rate": 2.351735502340178e-06, "loss": 0.5088, "step": 10790 }, { "epoch": 1.37, "grad_norm": 0.729817446892084, "learning_rate": 2.350860552345908e-06, "loss": 0.5746, "step": 10791 }, { "epoch": 1.37, "grad_norm": 0.6946337814457026, "learning_rate": 2.349985715117038e-06, "loss": 0.5064, "step": 10792 }, { "epoch": 1.37, "grad_norm": 0.5995985235027571, "learning_rate": 2.3491109906908095e-06, "loss": 0.4355, "step": 10793 }, { "epoch": 1.38, "grad_norm": 0.6098114528755813, "learning_rate": 2.348236379104453e-06, "loss": 0.453, "step": 10794 }, { "epoch": 1.38, "grad_norm": 0.5293180168757409, "learning_rate": 2.3473618803951997e-06, "loss": 0.4282, "step": 10795 }, { "epoch": 1.38, "grad_norm": 0.6461010280129098, "learning_rate": 2.3464874946002762e-06, "loss": 0.4835, "step": 10796 }, { "epoch": 1.38, "grad_norm": 0.604742512563059, "learning_rate": 2.3456132217568994e-06, "loss": 0.4334, "step": 10797 }, { "epoch": 1.38, "grad_norm": 0.6728208997366567, "learning_rate": 2.3447390619022873e-06, "loss": 0.4376, "step": 10798 }, { "epoch": 1.38, "grad_norm": 0.6260716571252182, "learning_rate": 2.3438650150736465e-06, "loss": 0.438, "step": 10799 }, { "epoch": 1.38, "grad_norm": 0.6411162499271806, "learning_rate": 2.342991081308186e-06, "loss": 0.5091, "step": 10800 }, { "epoch": 1.38, "grad_norm": 0.7894618104541057, "learning_rate": 2.342117260643103e-06, "loss": 0.4988, "step": 10801 }, { "epoch": 1.38, "grad_norm": 0.7268751297462787, "learning_rate": 2.3412435531155963e-06, "loss": 0.4491, "step": 10802 }, { "epoch": 1.38, "grad_norm": 0.8370397054839325, "learning_rate": 2.3403699587628537e-06, "loss": 0.507, "step": 10803 }, { "epoch": 1.38, "grad_norm": 0.6221575888969254, "learning_rate": 2.339496477622065e-06, "loss": 0.4539, "step": 10804 }, { "epoch": 1.38, "grad_norm": 0.7669903378821644, "learning_rate": 2.3386231097304073e-06, "loss": 0.5337, "step": 10805 }, { "epoch": 1.38, "grad_norm": 0.7556400172219717, "learning_rate": 2.3377498551250612e-06, "loss": 0.5698, "step": 10806 }, { "epoch": 1.38, "grad_norm": 0.5907499487355397, "learning_rate": 2.3368767138431926e-06, "loss": 0.4793, "step": 10807 }, { "epoch": 1.38, "grad_norm": 0.9718417545714607, "learning_rate": 2.3360036859219758e-06, "loss": 0.5496, "step": 10808 }, { "epoch": 1.38, "grad_norm": 0.80538505610934, "learning_rate": 2.3351307713985678e-06, "loss": 0.4847, "step": 10809 }, { "epoch": 1.38, "grad_norm": 0.6634063433384094, "learning_rate": 2.334257970310129e-06, "loss": 0.4551, "step": 10810 }, { "epoch": 1.38, "grad_norm": 0.7258577887381235, "learning_rate": 2.3333852826938082e-06, "loss": 0.5441, "step": 10811 }, { "epoch": 1.38, "grad_norm": 0.6722498850302644, "learning_rate": 2.3325127085867554e-06, "loss": 0.4432, "step": 10812 }, { "epoch": 1.38, "grad_norm": 0.7829308135547088, "learning_rate": 2.331640248026112e-06, "loss": 0.4876, "step": 10813 }, { "epoch": 1.38, "grad_norm": 0.7334023455009091, "learning_rate": 2.3307679010490175e-06, "loss": 0.5017, "step": 10814 }, { "epoch": 1.38, "grad_norm": 0.6371490893324795, "learning_rate": 2.3298956676926023e-06, "loss": 0.4521, "step": 10815 }, { "epoch": 1.38, "grad_norm": 0.6348561939785046, "learning_rate": 2.329023547993998e-06, "loss": 0.453, "step": 10816 }, { "epoch": 1.38, "grad_norm": 0.7845462444401949, "learning_rate": 2.3281515419903233e-06, "loss": 0.4941, "step": 10817 }, { "epoch": 1.38, "grad_norm": 0.7707273584196197, "learning_rate": 2.3272796497187023e-06, "loss": 0.5087, "step": 10818 }, { "epoch": 1.38, "grad_norm": 0.691895113348418, "learning_rate": 2.3264078712162413e-06, "loss": 0.4724, "step": 10819 }, { "epoch": 1.38, "grad_norm": 0.6067262766921584, "learning_rate": 2.325536206520058e-06, "loss": 0.442, "step": 10820 }, { "epoch": 1.38, "grad_norm": 0.6007288267040728, "learning_rate": 2.32466465566725e-06, "loss": 0.4777, "step": 10821 }, { "epoch": 1.38, "grad_norm": 0.8321389327862625, "learning_rate": 2.32379321869492e-06, "loss": 0.5049, "step": 10822 }, { "epoch": 1.38, "grad_norm": 0.7320317635548815, "learning_rate": 2.3229218956401593e-06, "loss": 0.5155, "step": 10823 }, { "epoch": 1.38, "grad_norm": 0.6482921480648263, "learning_rate": 2.3220506865400604e-06, "loss": 0.4684, "step": 10824 }, { "epoch": 1.38, "grad_norm": 0.8111920894798678, "learning_rate": 2.321179591431705e-06, "loss": 0.4723, "step": 10825 }, { "epoch": 1.38, "grad_norm": 0.6701617418798336, "learning_rate": 2.3203086103521753e-06, "loss": 0.4953, "step": 10826 }, { "epoch": 1.38, "grad_norm": 0.8428668487849036, "learning_rate": 2.3194377433385434e-06, "loss": 0.5295, "step": 10827 }, { "epoch": 1.38, "grad_norm": 0.727807295443101, "learning_rate": 2.318566990427883e-06, "loss": 0.5495, "step": 10828 }, { "epoch": 1.38, "grad_norm": 0.6395711581650799, "learning_rate": 2.3176963516572546e-06, "loss": 0.4289, "step": 10829 }, { "epoch": 1.38, "grad_norm": 0.6438211565663589, "learning_rate": 2.3168258270637233e-06, "loss": 0.4228, "step": 10830 }, { "epoch": 1.38, "grad_norm": 0.618310023751601, "learning_rate": 2.3159554166843408e-06, "loss": 0.4546, "step": 10831 }, { "epoch": 1.38, "grad_norm": 0.7663220610981202, "learning_rate": 2.315085120556159e-06, "loss": 0.5122, "step": 10832 }, { "epoch": 1.38, "grad_norm": 0.5277908735189812, "learning_rate": 2.314214938716224e-06, "loss": 0.4127, "step": 10833 }, { "epoch": 1.38, "grad_norm": 0.6815308361486874, "learning_rate": 2.3133448712015784e-06, "loss": 0.4891, "step": 10834 }, { "epoch": 1.38, "grad_norm": 0.6643055000468925, "learning_rate": 2.312474918049254e-06, "loss": 0.4648, "step": 10835 }, { "epoch": 1.38, "grad_norm": 0.7628619037188938, "learning_rate": 2.311605079296286e-06, "loss": 0.5443, "step": 10836 }, { "epoch": 1.38, "grad_norm": 0.697087369735636, "learning_rate": 2.310735354979698e-06, "loss": 0.4966, "step": 10837 }, { "epoch": 1.38, "grad_norm": 0.63360614192725, "learning_rate": 2.309865745136513e-06, "loss": 0.4964, "step": 10838 }, { "epoch": 1.38, "grad_norm": 0.5240209848865982, "learning_rate": 2.3089962498037454e-06, "loss": 0.386, "step": 10839 }, { "epoch": 1.38, "grad_norm": 0.784939710411291, "learning_rate": 2.3081268690184106e-06, "loss": 0.4941, "step": 10840 }, { "epoch": 1.38, "grad_norm": 0.8528359232804006, "learning_rate": 2.307257602817511e-06, "loss": 0.5312, "step": 10841 }, { "epoch": 1.38, "grad_norm": 0.8980076187595706, "learning_rate": 2.306388451238052e-06, "loss": 0.5149, "step": 10842 }, { "epoch": 1.38, "grad_norm": 0.6039802533407461, "learning_rate": 2.3055194143170268e-06, "loss": 0.4484, "step": 10843 }, { "epoch": 1.38, "grad_norm": 0.673218966898378, "learning_rate": 2.30465049209143e-06, "loss": 0.5021, "step": 10844 }, { "epoch": 1.38, "grad_norm": 0.71350439495478, "learning_rate": 2.3037816845982512e-06, "loss": 0.5002, "step": 10845 }, { "epoch": 1.38, "grad_norm": 0.6059695953100277, "learning_rate": 2.302912991874468e-06, "loss": 0.4107, "step": 10846 }, { "epoch": 1.38, "grad_norm": 0.629674748262165, "learning_rate": 2.302044413957062e-06, "loss": 0.5115, "step": 10847 }, { "epoch": 1.38, "grad_norm": 0.6957572241525941, "learning_rate": 2.3011759508830017e-06, "loss": 0.5358, "step": 10848 }, { "epoch": 1.38, "grad_norm": 0.7079104543439373, "learning_rate": 2.300307602689259e-06, "loss": 0.5064, "step": 10849 }, { "epoch": 1.38, "grad_norm": 0.6748084773974228, "learning_rate": 2.2994393694127925e-06, "loss": 0.4545, "step": 10850 }, { "epoch": 1.38, "grad_norm": 0.8198493326666778, "learning_rate": 2.298571251090563e-06, "loss": 0.5341, "step": 10851 }, { "epoch": 1.38, "grad_norm": 0.8354673963447915, "learning_rate": 2.297703247759524e-06, "loss": 0.485, "step": 10852 }, { "epoch": 1.38, "grad_norm": 0.609875454634327, "learning_rate": 2.2968353594566204e-06, "loss": 0.4674, "step": 10853 }, { "epoch": 1.38, "grad_norm": 0.8023069330239143, "learning_rate": 2.2959675862188e-06, "loss": 0.5317, "step": 10854 }, { "epoch": 1.38, "grad_norm": 0.646843375110653, "learning_rate": 2.2950999280829965e-06, "loss": 0.4665, "step": 10855 }, { "epoch": 1.38, "grad_norm": 0.8594025282298801, "learning_rate": 2.294232385086146e-06, "loss": 0.5593, "step": 10856 }, { "epoch": 1.38, "grad_norm": 0.6386188645289982, "learning_rate": 2.2933649572651774e-06, "loss": 0.4471, "step": 10857 }, { "epoch": 1.38, "grad_norm": 0.595628600036395, "learning_rate": 2.2924976446570117e-06, "loss": 0.434, "step": 10858 }, { "epoch": 1.38, "grad_norm": 0.6302137959335479, "learning_rate": 2.2916304472985717e-06, "loss": 0.4214, "step": 10859 }, { "epoch": 1.38, "grad_norm": 0.7256589824548828, "learning_rate": 2.2907633652267665e-06, "loss": 0.4948, "step": 10860 }, { "epoch": 1.38, "grad_norm": 0.5784285660717894, "learning_rate": 2.2898963984785094e-06, "loss": 0.4217, "step": 10861 }, { "epoch": 1.38, "grad_norm": 0.6399792271673397, "learning_rate": 2.2890295470907004e-06, "loss": 0.5031, "step": 10862 }, { "epoch": 1.38, "grad_norm": 0.8377256946518645, "learning_rate": 2.288162811100243e-06, "loss": 0.5681, "step": 10863 }, { "epoch": 1.38, "grad_norm": 0.7997655840390412, "learning_rate": 2.287296190544026e-06, "loss": 0.5771, "step": 10864 }, { "epoch": 1.38, "grad_norm": 0.6840156624585454, "learning_rate": 2.2864296854589447e-06, "loss": 0.5083, "step": 10865 }, { "epoch": 1.38, "grad_norm": 0.7734226611635323, "learning_rate": 2.2855632958818775e-06, "loss": 0.5284, "step": 10866 }, { "epoch": 1.38, "grad_norm": 0.7179434223053618, "learning_rate": 2.2846970218497087e-06, "loss": 0.5535, "step": 10867 }, { "epoch": 1.38, "grad_norm": 0.853643224698523, "learning_rate": 2.2838308633993094e-06, "loss": 0.562, "step": 10868 }, { "epoch": 1.38, "grad_norm": 0.7325780615361955, "learning_rate": 2.2829648205675505e-06, "loss": 0.5552, "step": 10869 }, { "epoch": 1.38, "grad_norm": 0.7512348455354795, "learning_rate": 2.282098893391297e-06, "loss": 0.5048, "step": 10870 }, { "epoch": 1.38, "grad_norm": 0.6871867274601527, "learning_rate": 2.2812330819074098e-06, "loss": 0.4601, "step": 10871 }, { "epoch": 1.39, "grad_norm": 0.6898907884307547, "learning_rate": 2.2803673861527402e-06, "loss": 0.4737, "step": 10872 }, { "epoch": 1.39, "grad_norm": 0.7204682129670187, "learning_rate": 2.2795018061641426e-06, "loss": 0.4918, "step": 10873 }, { "epoch": 1.39, "grad_norm": 0.7429270747384472, "learning_rate": 2.278636341978458e-06, "loss": 0.5492, "step": 10874 }, { "epoch": 1.39, "grad_norm": 0.7478198372537292, "learning_rate": 2.27777099363253e-06, "loss": 0.4986, "step": 10875 }, { "epoch": 1.39, "grad_norm": 0.6253559315027629, "learning_rate": 2.2769057611631896e-06, "loss": 0.4849, "step": 10876 }, { "epoch": 1.39, "grad_norm": 0.7917345277229931, "learning_rate": 2.276040644607271e-06, "loss": 0.539, "step": 10877 }, { "epoch": 1.39, "grad_norm": 1.0129410880127359, "learning_rate": 2.275175644001596e-06, "loss": 0.529, "step": 10878 }, { "epoch": 1.39, "grad_norm": 0.6416267711648092, "learning_rate": 2.2743107593829882e-06, "loss": 0.5089, "step": 10879 }, { "epoch": 1.39, "grad_norm": 0.7531020512628711, "learning_rate": 2.2734459907882593e-06, "loss": 0.5236, "step": 10880 }, { "epoch": 1.39, "grad_norm": 0.6191266285390059, "learning_rate": 2.272581338254222e-06, "loss": 0.4809, "step": 10881 }, { "epoch": 1.39, "grad_norm": 0.7061625663095266, "learning_rate": 2.2717168018176816e-06, "loss": 0.4906, "step": 10882 }, { "epoch": 1.39, "grad_norm": 0.6615318409787045, "learning_rate": 2.270852381515441e-06, "loss": 0.4588, "step": 10883 }, { "epoch": 1.39, "grad_norm": 0.5958957323794096, "learning_rate": 2.269988077384291e-06, "loss": 0.4467, "step": 10884 }, { "epoch": 1.39, "grad_norm": 0.7140301508599692, "learning_rate": 2.2691238894610262e-06, "loss": 0.5212, "step": 10885 }, { "epoch": 1.39, "grad_norm": 0.70847909896908, "learning_rate": 2.2682598177824295e-06, "loss": 0.4742, "step": 10886 }, { "epoch": 1.39, "grad_norm": 0.7408743619337694, "learning_rate": 2.2673958623852845e-06, "loss": 0.5357, "step": 10887 }, { "epoch": 1.39, "grad_norm": 0.624318605854862, "learning_rate": 2.266532023306364e-06, "loss": 0.4861, "step": 10888 }, { "epoch": 1.39, "grad_norm": 0.6330453646878533, "learning_rate": 2.2656683005824414e-06, "loss": 0.4498, "step": 10889 }, { "epoch": 1.39, "grad_norm": 0.830984947084523, "learning_rate": 2.26480469425028e-06, "loss": 0.5654, "step": 10890 }, { "epoch": 1.39, "grad_norm": 1.1469468777747078, "learning_rate": 2.263941204346645e-06, "loss": 0.5465, "step": 10891 }, { "epoch": 1.39, "grad_norm": 0.7971891625506593, "learning_rate": 2.263077830908287e-06, "loss": 0.5525, "step": 10892 }, { "epoch": 1.39, "grad_norm": 0.7687905450681011, "learning_rate": 2.2622145739719618e-06, "loss": 0.5422, "step": 10893 }, { "epoch": 1.39, "grad_norm": 0.7859554968939656, "learning_rate": 2.26135143357441e-06, "loss": 0.5408, "step": 10894 }, { "epoch": 1.39, "grad_norm": 0.7897704926868802, "learning_rate": 2.26048840975238e-06, "loss": 0.5684, "step": 10895 }, { "epoch": 1.39, "grad_norm": 0.6984672281073324, "learning_rate": 2.2596255025426016e-06, "loss": 0.557, "step": 10896 }, { "epoch": 1.39, "grad_norm": 0.7968758589073045, "learning_rate": 2.2587627119818114e-06, "loss": 0.5242, "step": 10897 }, { "epoch": 1.39, "grad_norm": 0.7185412552560613, "learning_rate": 2.25790003810673e-06, "loss": 0.4765, "step": 10898 }, { "epoch": 1.39, "grad_norm": 0.6079780037116617, "learning_rate": 2.257037480954084e-06, "loss": 0.4408, "step": 10899 }, { "epoch": 1.39, "grad_norm": 0.6468975121598799, "learning_rate": 2.2561750405605852e-06, "loss": 0.4928, "step": 10900 }, { "epoch": 1.39, "grad_norm": 0.7641646389324386, "learning_rate": 2.2553127169629483e-06, "loss": 0.4848, "step": 10901 }, { "epoch": 1.39, "grad_norm": 0.6023575145887837, "learning_rate": 2.2544505101978775e-06, "loss": 0.4138, "step": 10902 }, { "epoch": 1.39, "grad_norm": 0.6652981432363783, "learning_rate": 2.253588420302076e-06, "loss": 0.4721, "step": 10903 }, { "epoch": 1.39, "grad_norm": 0.5903932658563311, "learning_rate": 2.2527264473122375e-06, "loss": 0.4271, "step": 10904 }, { "epoch": 1.39, "grad_norm": 0.5738573114569012, "learning_rate": 2.2518645912650565e-06, "loss": 0.4052, "step": 10905 }, { "epoch": 1.39, "grad_norm": 0.6396189298051217, "learning_rate": 2.251002852197217e-06, "loss": 0.4674, "step": 10906 }, { "epoch": 1.39, "grad_norm": 0.8670799584371012, "learning_rate": 2.2501412301454014e-06, "loss": 0.5947, "step": 10907 }, { "epoch": 1.39, "grad_norm": 0.8304689547212774, "learning_rate": 2.249279725146288e-06, "loss": 0.4901, "step": 10908 }, { "epoch": 1.39, "grad_norm": 0.776941287475717, "learning_rate": 2.248418337236545e-06, "loss": 0.4466, "step": 10909 }, { "epoch": 1.39, "grad_norm": 0.6066677428957276, "learning_rate": 2.247557066452842e-06, "loss": 0.4477, "step": 10910 }, { "epoch": 1.39, "grad_norm": 0.5851247005247016, "learning_rate": 2.246695912831837e-06, "loss": 0.4324, "step": 10911 }, { "epoch": 1.39, "grad_norm": 0.6741998445693816, "learning_rate": 2.2458348764101893e-06, "loss": 0.5196, "step": 10912 }, { "epoch": 1.39, "grad_norm": 0.6836705911603268, "learning_rate": 2.2449739572245522e-06, "loss": 0.4922, "step": 10913 }, { "epoch": 1.39, "grad_norm": 0.5734677035468869, "learning_rate": 2.244113155311567e-06, "loss": 0.4967, "step": 10914 }, { "epoch": 1.39, "grad_norm": 0.6037662181064927, "learning_rate": 2.2432524707078806e-06, "loss": 0.4282, "step": 10915 }, { "epoch": 1.39, "grad_norm": 0.6634103154685594, "learning_rate": 2.242391903450126e-06, "loss": 0.511, "step": 10916 }, { "epoch": 1.39, "grad_norm": 0.6992329402401665, "learning_rate": 2.2415314535749364e-06, "loss": 0.5075, "step": 10917 }, { "epoch": 1.39, "grad_norm": 0.5164375626469455, "learning_rate": 2.2406711211189376e-06, "loss": 0.4375, "step": 10918 }, { "epoch": 1.39, "grad_norm": 0.7710943939776963, "learning_rate": 2.2398109061187507e-06, "loss": 0.5269, "step": 10919 }, { "epoch": 1.39, "grad_norm": 0.7076536057752935, "learning_rate": 2.2389508086109953e-06, "loss": 0.5515, "step": 10920 }, { "epoch": 1.39, "grad_norm": 0.5653953532490982, "learning_rate": 2.2380908286322796e-06, "loss": 0.498, "step": 10921 }, { "epoch": 1.39, "grad_norm": 0.7356905916586278, "learning_rate": 2.237230966219213e-06, "loss": 0.4974, "step": 10922 }, { "epoch": 1.39, "grad_norm": 0.6122952244606756, "learning_rate": 2.236371221408393e-06, "loss": 0.4574, "step": 10923 }, { "epoch": 1.39, "grad_norm": 1.6170098335556904, "learning_rate": 2.2355115942364212e-06, "loss": 0.5068, "step": 10924 }, { "epoch": 1.39, "grad_norm": 0.716916126601903, "learning_rate": 2.2346520847398856e-06, "loss": 0.5341, "step": 10925 }, { "epoch": 1.39, "grad_norm": 0.7358376455793131, "learning_rate": 2.233792692955375e-06, "loss": 0.4922, "step": 10926 }, { "epoch": 1.39, "grad_norm": 0.7445956345088092, "learning_rate": 2.2329334189194684e-06, "loss": 0.484, "step": 10927 }, { "epoch": 1.39, "grad_norm": 0.7791986312983723, "learning_rate": 2.232074262668746e-06, "loss": 0.5081, "step": 10928 }, { "epoch": 1.39, "grad_norm": 0.6554900309954337, "learning_rate": 2.2312152242397748e-06, "loss": 0.5061, "step": 10929 }, { "epoch": 1.39, "grad_norm": 0.9264581091645545, "learning_rate": 2.2303563036691262e-06, "loss": 0.5572, "step": 10930 }, { "epoch": 1.39, "grad_norm": 0.7857140826550714, "learning_rate": 2.229497500993355e-06, "loss": 0.4902, "step": 10931 }, { "epoch": 1.39, "grad_norm": 0.6708662137603609, "learning_rate": 2.228638816249026e-06, "loss": 0.4536, "step": 10932 }, { "epoch": 1.39, "grad_norm": 0.7342422332790777, "learning_rate": 2.227780249472684e-06, "loss": 0.4723, "step": 10933 }, { "epoch": 1.39, "grad_norm": 0.5941908653503654, "learning_rate": 2.226921800700881e-06, "loss": 0.4013, "step": 10934 }, { "epoch": 1.39, "grad_norm": 0.5758168265318097, "learning_rate": 2.2260634699701527e-06, "loss": 0.4454, "step": 10935 }, { "epoch": 1.39, "grad_norm": 0.6809199870318094, "learning_rate": 2.225205257317041e-06, "loss": 0.4624, "step": 10936 }, { "epoch": 1.39, "grad_norm": 0.7105686784561956, "learning_rate": 2.2243471627780716e-06, "loss": 0.491, "step": 10937 }, { "epoch": 1.39, "grad_norm": 0.6869496746710227, "learning_rate": 2.223489186389776e-06, "loss": 0.4574, "step": 10938 }, { "epoch": 1.39, "grad_norm": 0.6150084343203703, "learning_rate": 2.2226313281886718e-06, "loss": 0.4991, "step": 10939 }, { "epoch": 1.39, "grad_norm": 0.6877291962032914, "learning_rate": 2.2217735882112784e-06, "loss": 0.4972, "step": 10940 }, { "epoch": 1.39, "grad_norm": 0.8457120854417003, "learning_rate": 2.2209159664941034e-06, "loss": 0.45, "step": 10941 }, { "epoch": 1.39, "grad_norm": 0.6081577001832593, "learning_rate": 2.220058463073657e-06, "loss": 0.4321, "step": 10942 }, { "epoch": 1.39, "grad_norm": 0.624573766215268, "learning_rate": 2.219201077986435e-06, "loss": 0.4753, "step": 10943 }, { "epoch": 1.39, "grad_norm": 0.7625434569000092, "learning_rate": 2.21834381126894e-06, "loss": 0.5031, "step": 10944 }, { "epoch": 1.39, "grad_norm": 0.7393329927868117, "learning_rate": 2.2174866629576585e-06, "loss": 0.5064, "step": 10945 }, { "epoch": 1.39, "grad_norm": 0.6317837046751107, "learning_rate": 2.2166296330890802e-06, "loss": 0.4425, "step": 10946 }, { "epoch": 1.39, "grad_norm": 0.6471990765402897, "learning_rate": 2.2157727216996823e-06, "loss": 0.4404, "step": 10947 }, { "epoch": 1.39, "grad_norm": 0.6592984573783606, "learning_rate": 2.2149159288259435e-06, "loss": 0.4622, "step": 10948 }, { "epoch": 1.39, "grad_norm": 0.541674743752519, "learning_rate": 2.2140592545043326e-06, "loss": 0.4044, "step": 10949 }, { "epoch": 1.39, "grad_norm": 0.6076175752250428, "learning_rate": 2.2132026987713185e-06, "loss": 0.457, "step": 10950 }, { "epoch": 1.4, "grad_norm": 0.5562338123275646, "learning_rate": 2.2123462616633585e-06, "loss": 0.451, "step": 10951 }, { "epoch": 1.4, "grad_norm": 1.6705609066312985, "learning_rate": 2.211489943216911e-06, "loss": 0.5134, "step": 10952 }, { "epoch": 1.4, "grad_norm": 0.7642569088347329, "learning_rate": 2.210633743468425e-06, "loss": 0.5315, "step": 10953 }, { "epoch": 1.4, "grad_norm": 0.841355749038376, "learning_rate": 2.209777662454348e-06, "loss": 0.5276, "step": 10954 }, { "epoch": 1.4, "grad_norm": 1.0077373634766211, "learning_rate": 2.2089217002111184e-06, "loss": 0.5449, "step": 10955 }, { "epoch": 1.4, "grad_norm": 0.584543446062315, "learning_rate": 2.2080658567751727e-06, "loss": 0.5077, "step": 10956 }, { "epoch": 1.4, "grad_norm": 0.7767332844256136, "learning_rate": 2.207210132182942e-06, "loss": 0.5309, "step": 10957 }, { "epoch": 1.4, "grad_norm": 0.6926558720085354, "learning_rate": 2.2063545264708535e-06, "loss": 0.479, "step": 10958 }, { "epoch": 1.4, "grad_norm": 0.6884824516269165, "learning_rate": 2.2054990396753236e-06, "loss": 0.4546, "step": 10959 }, { "epoch": 1.4, "grad_norm": 0.6168371490437435, "learning_rate": 2.204643671832771e-06, "loss": 0.4368, "step": 10960 }, { "epoch": 1.4, "grad_norm": 0.7702975252598642, "learning_rate": 2.203788422979603e-06, "loss": 0.5454, "step": 10961 }, { "epoch": 1.4, "grad_norm": 0.8119574575472767, "learning_rate": 2.2029332931522284e-06, "loss": 0.5198, "step": 10962 }, { "epoch": 1.4, "grad_norm": 0.9934127618301879, "learning_rate": 2.202078282387044e-06, "loss": 0.4838, "step": 10963 }, { "epoch": 1.4, "grad_norm": 0.6712955357044467, "learning_rate": 2.2012233907204474e-06, "loss": 0.4438, "step": 10964 }, { "epoch": 1.4, "grad_norm": 0.5654238662769213, "learning_rate": 2.2003686181888257e-06, "loss": 0.4054, "step": 10965 }, { "epoch": 1.4, "grad_norm": 0.60106904253335, "learning_rate": 2.1995139648285673e-06, "loss": 0.4268, "step": 10966 }, { "epoch": 1.4, "grad_norm": 0.6669297735871007, "learning_rate": 2.198659430676049e-06, "loss": 0.4651, "step": 10967 }, { "epoch": 1.4, "grad_norm": 0.7193298588316885, "learning_rate": 2.1978050157676465e-06, "loss": 0.4746, "step": 10968 }, { "epoch": 1.4, "grad_norm": 0.6321282919246608, "learning_rate": 2.1969507201397323e-06, "loss": 0.4319, "step": 10969 }, { "epoch": 1.4, "grad_norm": 1.8279561336571408, "learning_rate": 2.196096543828666e-06, "loss": 0.5255, "step": 10970 }, { "epoch": 1.4, "grad_norm": 0.8142394644349056, "learning_rate": 2.195242486870812e-06, "loss": 0.4659, "step": 10971 }, { "epoch": 1.4, "grad_norm": 0.7207852920771451, "learning_rate": 2.194388549302521e-06, "loss": 0.4924, "step": 10972 }, { "epoch": 1.4, "grad_norm": 0.6702533859239594, "learning_rate": 2.193534731160144e-06, "loss": 0.5134, "step": 10973 }, { "epoch": 1.4, "grad_norm": 0.7363740120200745, "learning_rate": 2.1926810324800275e-06, "loss": 0.4671, "step": 10974 }, { "epoch": 1.4, "grad_norm": 0.6283988046886836, "learning_rate": 2.191827453298506e-06, "loss": 0.4751, "step": 10975 }, { "epoch": 1.4, "grad_norm": 0.6227413034732902, "learning_rate": 2.190973993651918e-06, "loss": 0.4862, "step": 10976 }, { "epoch": 1.4, "grad_norm": 0.5495609392943195, "learning_rate": 2.1901206535765896e-06, "loss": 0.4458, "step": 10977 }, { "epoch": 1.4, "grad_norm": 0.6314091810114084, "learning_rate": 2.189267433108847e-06, "loss": 0.4807, "step": 10978 }, { "epoch": 1.4, "grad_norm": 0.7020886683319169, "learning_rate": 2.1884143322850067e-06, "loss": 0.4894, "step": 10979 }, { "epoch": 1.4, "grad_norm": 0.6301172822728136, "learning_rate": 2.1875613511413835e-06, "loss": 0.4169, "step": 10980 }, { "epoch": 1.4, "grad_norm": 0.6803946015984277, "learning_rate": 2.1867084897142876e-06, "loss": 0.5174, "step": 10981 }, { "epoch": 1.4, "grad_norm": 0.8329509918098765, "learning_rate": 2.18585574804002e-06, "loss": 0.5136, "step": 10982 }, { "epoch": 1.4, "grad_norm": 0.6443847846269465, "learning_rate": 2.1850031261548825e-06, "loss": 0.4595, "step": 10983 }, { "epoch": 1.4, "grad_norm": 0.5957642063948049, "learning_rate": 2.1841506240951644e-06, "loss": 0.4559, "step": 10984 }, { "epoch": 1.4, "grad_norm": 0.7878583552992771, "learning_rate": 2.183298241897158e-06, "loss": 0.4749, "step": 10985 }, { "epoch": 1.4, "grad_norm": 0.6979600090124072, "learning_rate": 2.1824459795971426e-06, "loss": 0.4922, "step": 10986 }, { "epoch": 1.4, "grad_norm": 0.8209454940473095, "learning_rate": 2.1815938372314e-06, "loss": 0.5243, "step": 10987 }, { "epoch": 1.4, "grad_norm": 0.6023625779386415, "learning_rate": 2.1807418148362e-06, "loss": 0.4576, "step": 10988 }, { "epoch": 1.4, "grad_norm": 0.615940588367025, "learning_rate": 2.1798899124478135e-06, "loss": 0.4706, "step": 10989 }, { "epoch": 1.4, "grad_norm": 0.61390045257212, "learning_rate": 2.1790381301024993e-06, "loss": 0.4279, "step": 10990 }, { "epoch": 1.4, "grad_norm": 0.555138987211361, "learning_rate": 2.17818646783652e-06, "loss": 0.4089, "step": 10991 }, { "epoch": 1.4, "grad_norm": 0.5683507604892607, "learning_rate": 2.177334925686123e-06, "loss": 0.499, "step": 10992 }, { "epoch": 1.4, "grad_norm": 0.7343151709091383, "learning_rate": 2.1764835036875585e-06, "loss": 0.4961, "step": 10993 }, { "epoch": 1.4, "grad_norm": 0.7442754463906057, "learning_rate": 2.1756322018770685e-06, "loss": 0.481, "step": 10994 }, { "epoch": 1.4, "grad_norm": 0.5611698795914328, "learning_rate": 2.1747810202908924e-06, "loss": 0.4069, "step": 10995 }, { "epoch": 1.4, "grad_norm": 0.6391799824310542, "learning_rate": 2.1739299589652585e-06, "loss": 0.4793, "step": 10996 }, { "epoch": 1.4, "grad_norm": 1.0105235284549798, "learning_rate": 2.173079017936397e-06, "loss": 0.5128, "step": 10997 }, { "epoch": 1.4, "grad_norm": 0.6317468501220947, "learning_rate": 2.1722281972405263e-06, "loss": 0.4406, "step": 10998 }, { "epoch": 1.4, "grad_norm": 0.5781795300100511, "learning_rate": 2.171377496913867e-06, "loss": 0.4915, "step": 10999 }, { "epoch": 1.4, "grad_norm": 0.8039107377411533, "learning_rate": 2.1705269169926265e-06, "loss": 0.5057, "step": 11000 }, { "epoch": 1.4, "grad_norm": 0.647062086882824, "learning_rate": 2.1696764575130155e-06, "loss": 0.4705, "step": 11001 }, { "epoch": 1.4, "grad_norm": 0.6032750648057195, "learning_rate": 2.1688261185112314e-06, "loss": 0.5155, "step": 11002 }, { "epoch": 1.4, "grad_norm": 0.8725981161136266, "learning_rate": 2.167975900023474e-06, "loss": 0.5371, "step": 11003 }, { "epoch": 1.4, "grad_norm": 0.7612226804291636, "learning_rate": 2.167125802085931e-06, "loss": 0.5239, "step": 11004 }, { "epoch": 1.4, "grad_norm": 0.7044912290668476, "learning_rate": 2.16627582473479e-06, "loss": 0.5128, "step": 11005 }, { "epoch": 1.4, "grad_norm": 0.6814329210257032, "learning_rate": 2.165425968006232e-06, "loss": 0.4399, "step": 11006 }, { "epoch": 1.4, "grad_norm": 0.6221781101872258, "learning_rate": 2.1645762319364346e-06, "loss": 0.4438, "step": 11007 }, { "epoch": 1.4, "grad_norm": 0.7546992385737619, "learning_rate": 2.163726616561564e-06, "loss": 0.4564, "step": 11008 }, { "epoch": 1.4, "grad_norm": 0.6794415969231482, "learning_rate": 2.16287712191779e-06, "loss": 0.4865, "step": 11009 }, { "epoch": 1.4, "grad_norm": 0.6438880633732903, "learning_rate": 2.1620277480412684e-06, "loss": 0.473, "step": 11010 }, { "epoch": 1.4, "grad_norm": 0.7669935149515065, "learning_rate": 2.1611784949681585e-06, "loss": 0.5343, "step": 11011 }, { "epoch": 1.4, "grad_norm": 0.8208067960589795, "learning_rate": 2.1603293627346074e-06, "loss": 0.5343, "step": 11012 }, { "epoch": 1.4, "grad_norm": 0.5818989720137931, "learning_rate": 2.1594803513767626e-06, "loss": 0.4405, "step": 11013 }, { "epoch": 1.4, "grad_norm": 0.7124391133639624, "learning_rate": 2.15863146093076e-06, "loss": 0.4811, "step": 11014 }, { "epoch": 1.4, "grad_norm": 0.6852734525096708, "learning_rate": 2.157782691432739e-06, "loss": 0.5071, "step": 11015 }, { "epoch": 1.4, "grad_norm": 0.6114566431044908, "learning_rate": 2.156934042918824e-06, "loss": 0.4313, "step": 11016 }, { "epoch": 1.4, "grad_norm": 0.6343984270692141, "learning_rate": 2.1560855154251433e-06, "loss": 0.4384, "step": 11017 }, { "epoch": 1.4, "grad_norm": 0.8647573277877423, "learning_rate": 2.155237108987811e-06, "loss": 0.4109, "step": 11018 }, { "epoch": 1.4, "grad_norm": 0.6361098323114568, "learning_rate": 2.1543888236429483e-06, "loss": 0.4358, "step": 11019 }, { "epoch": 1.4, "grad_norm": 0.7087256061379335, "learning_rate": 2.1535406594266577e-06, "loss": 0.4257, "step": 11020 }, { "epoch": 1.4, "grad_norm": 0.8394145324181174, "learning_rate": 2.1526926163750476e-06, "loss": 0.4702, "step": 11021 }, { "epoch": 1.4, "grad_norm": 0.6080438234027822, "learning_rate": 2.151844694524212e-06, "loss": 0.4134, "step": 11022 }, { "epoch": 1.4, "grad_norm": 0.6440188023966902, "learning_rate": 2.150996893910248e-06, "loss": 0.4978, "step": 11023 }, { "epoch": 1.4, "grad_norm": 0.8782516481647507, "learning_rate": 2.150149214569241e-06, "loss": 0.5174, "step": 11024 }, { "epoch": 1.4, "grad_norm": 0.5621256069500332, "learning_rate": 2.1493016565372764e-06, "loss": 0.4472, "step": 11025 }, { "epoch": 1.4, "grad_norm": 0.5990425563435783, "learning_rate": 2.1484542198504294e-06, "loss": 0.4666, "step": 11026 }, { "epoch": 1.4, "grad_norm": 0.7703837477702888, "learning_rate": 2.1476069045447756e-06, "loss": 0.5427, "step": 11027 }, { "epoch": 1.4, "grad_norm": 0.6400067325818959, "learning_rate": 2.1467597106563786e-06, "loss": 0.4565, "step": 11028 }, { "epoch": 1.41, "grad_norm": 0.6537877982122922, "learning_rate": 2.145912638221305e-06, "loss": 0.5026, "step": 11029 }, { "epoch": 1.41, "grad_norm": 0.7826774851046876, "learning_rate": 2.1450656872756086e-06, "loss": 0.5036, "step": 11030 }, { "epoch": 1.41, "grad_norm": 0.6172469896270728, "learning_rate": 2.144218857855343e-06, "loss": 0.4702, "step": 11031 }, { "epoch": 1.41, "grad_norm": 0.6895217052906698, "learning_rate": 2.1433721499965565e-06, "loss": 0.4968, "step": 11032 }, { "epoch": 1.41, "grad_norm": 0.6139194578998511, "learning_rate": 2.142525563735287e-06, "loss": 0.4577, "step": 11033 }, { "epoch": 1.41, "grad_norm": 0.7523131711554061, "learning_rate": 2.141679099107575e-06, "loss": 0.5359, "step": 11034 }, { "epoch": 1.41, "grad_norm": 0.8977456841423956, "learning_rate": 2.1408327561494485e-06, "loss": 0.5379, "step": 11035 }, { "epoch": 1.41, "grad_norm": 0.5891850449584672, "learning_rate": 2.1399865348969345e-06, "loss": 0.4571, "step": 11036 }, { "epoch": 1.41, "grad_norm": 0.5442590572440746, "learning_rate": 2.1391404353860562e-06, "loss": 0.4469, "step": 11037 }, { "epoch": 1.41, "grad_norm": 0.7141649666533418, "learning_rate": 2.1382944576528257e-06, "loss": 0.5459, "step": 11038 }, { "epoch": 1.41, "grad_norm": 0.7935313665736218, "learning_rate": 2.137448601733258e-06, "loss": 0.5358, "step": 11039 }, { "epoch": 1.41, "grad_norm": 0.849258668729484, "learning_rate": 2.1366028676633534e-06, "loss": 0.5159, "step": 11040 }, { "epoch": 1.41, "grad_norm": 0.7024594752468707, "learning_rate": 2.1357572554791166e-06, "loss": 0.4733, "step": 11041 }, { "epoch": 1.41, "grad_norm": 0.602915841238694, "learning_rate": 2.1349117652165384e-06, "loss": 0.4291, "step": 11042 }, { "epoch": 1.41, "grad_norm": 0.6994290517955272, "learning_rate": 2.1340663969116117e-06, "loss": 0.4836, "step": 11043 }, { "epoch": 1.41, "grad_norm": 0.7290551463236109, "learning_rate": 2.1332211506003215e-06, "loss": 0.5759, "step": 11044 }, { "epoch": 1.41, "grad_norm": 0.7593836673540703, "learning_rate": 2.1323760263186442e-06, "loss": 0.5106, "step": 11045 }, { "epoch": 1.41, "grad_norm": 1.569954565746356, "learning_rate": 2.1315310241025576e-06, "loss": 0.481, "step": 11046 }, { "epoch": 1.41, "grad_norm": 0.7105864612712174, "learning_rate": 2.130686143988027e-06, "loss": 0.4888, "step": 11047 }, { "epoch": 1.41, "grad_norm": 0.7373290698642639, "learning_rate": 2.1298413860110207e-06, "loss": 0.4938, "step": 11048 }, { "epoch": 1.41, "grad_norm": 0.6343770581486133, "learning_rate": 2.1289967502074927e-06, "loss": 0.4557, "step": 11049 }, { "epoch": 1.41, "grad_norm": 0.5774901260631604, "learning_rate": 2.1281522366134007e-06, "loss": 0.4714, "step": 11050 }, { "epoch": 1.41, "grad_norm": 0.6454348805802032, "learning_rate": 2.127307845264689e-06, "loss": 0.5188, "step": 11051 }, { "epoch": 1.41, "grad_norm": 1.109335767711921, "learning_rate": 2.126463576197304e-06, "loss": 0.4535, "step": 11052 }, { "epoch": 1.41, "grad_norm": 0.6783969564126169, "learning_rate": 2.1256194294471806e-06, "loss": 0.4939, "step": 11053 }, { "epoch": 1.41, "grad_norm": 0.766607386119204, "learning_rate": 2.1247754050502545e-06, "loss": 0.5071, "step": 11054 }, { "epoch": 1.41, "grad_norm": 0.6764714887012292, "learning_rate": 2.123931503042448e-06, "loss": 0.4915, "step": 11055 }, { "epoch": 1.41, "grad_norm": 0.67241679643197, "learning_rate": 2.1230877234596904e-06, "loss": 0.4741, "step": 11056 }, { "epoch": 1.41, "grad_norm": 0.6111770577975992, "learning_rate": 2.122244066337893e-06, "loss": 0.437, "step": 11057 }, { "epoch": 1.41, "grad_norm": 0.6379618823997341, "learning_rate": 2.1214005317129714e-06, "loss": 0.4311, "step": 11058 }, { "epoch": 1.41, "grad_norm": 0.6652716344344426, "learning_rate": 2.120557119620829e-06, "loss": 0.5059, "step": 11059 }, { "epoch": 1.41, "grad_norm": 0.6668437273619943, "learning_rate": 2.119713830097371e-06, "loss": 0.5124, "step": 11060 }, { "epoch": 1.41, "grad_norm": 0.7391629689550185, "learning_rate": 2.118870663178488e-06, "loss": 0.5147, "step": 11061 }, { "epoch": 1.41, "grad_norm": 0.7573888819210415, "learning_rate": 2.118027618900077e-06, "loss": 0.557, "step": 11062 }, { "epoch": 1.41, "grad_norm": 0.5660830013577599, "learning_rate": 2.1171846972980185e-06, "loss": 0.455, "step": 11063 }, { "epoch": 1.41, "grad_norm": 0.5414229853801957, "learning_rate": 2.116341898408198e-06, "loss": 0.4536, "step": 11064 }, { "epoch": 1.41, "grad_norm": 0.656530204708304, "learning_rate": 2.1154992222664857e-06, "loss": 0.4856, "step": 11065 }, { "epoch": 1.41, "grad_norm": 0.5606991332043892, "learning_rate": 2.114656668908755e-06, "loss": 0.4488, "step": 11066 }, { "epoch": 1.41, "grad_norm": 0.6274954726238137, "learning_rate": 2.113814238370867e-06, "loss": 0.4774, "step": 11067 }, { "epoch": 1.41, "grad_norm": 0.7467665757531308, "learning_rate": 2.112971930688687e-06, "loss": 0.5125, "step": 11068 }, { "epoch": 1.41, "grad_norm": 0.6857575984261297, "learning_rate": 2.112129745898065e-06, "loss": 0.4938, "step": 11069 }, { "epoch": 1.41, "grad_norm": 0.8540164108230414, "learning_rate": 2.1112876840348527e-06, "loss": 0.5532, "step": 11070 }, { "epoch": 1.41, "grad_norm": 0.7300724933284729, "learning_rate": 2.110445745134891e-06, "loss": 0.5386, "step": 11071 }, { "epoch": 1.41, "grad_norm": 0.6618732898394578, "learning_rate": 2.109603929234022e-06, "loss": 0.4534, "step": 11072 }, { "epoch": 1.41, "grad_norm": 0.7967401665876949, "learning_rate": 2.108762236368076e-06, "loss": 0.4595, "step": 11073 }, { "epoch": 1.41, "grad_norm": 0.6252598075712752, "learning_rate": 2.1079206665728835e-06, "loss": 0.4535, "step": 11074 }, { "epoch": 1.41, "grad_norm": 0.658759871928441, "learning_rate": 2.1070792198842654e-06, "loss": 0.4671, "step": 11075 }, { "epoch": 1.41, "grad_norm": 0.7109059281672796, "learning_rate": 2.106237896338042e-06, "loss": 0.5544, "step": 11076 }, { "epoch": 1.41, "grad_norm": 0.825859182314527, "learning_rate": 2.1053966959700223e-06, "loss": 0.5077, "step": 11077 }, { "epoch": 1.41, "grad_norm": 0.6402100340838714, "learning_rate": 2.104555618816018e-06, "loss": 0.4498, "step": 11078 }, { "epoch": 1.41, "grad_norm": 0.7458329944559269, "learning_rate": 2.1037146649118256e-06, "loss": 0.5309, "step": 11079 }, { "epoch": 1.41, "grad_norm": 0.7952858746286077, "learning_rate": 2.1028738342932453e-06, "loss": 0.5535, "step": 11080 }, { "epoch": 1.41, "grad_norm": 0.8016345419733305, "learning_rate": 2.1020331269960676e-06, "loss": 0.4869, "step": 11081 }, { "epoch": 1.41, "grad_norm": 0.7268533641558307, "learning_rate": 2.101192543056081e-06, "loss": 0.492, "step": 11082 }, { "epoch": 1.41, "grad_norm": 0.7324600575608072, "learning_rate": 2.100352082509063e-06, "loss": 0.5313, "step": 11083 }, { "epoch": 1.41, "grad_norm": 0.8198207889099383, "learning_rate": 2.0995117453907927e-06, "loss": 0.5854, "step": 11084 }, { "epoch": 1.41, "grad_norm": 0.74181732426242, "learning_rate": 2.098671531737037e-06, "loss": 0.5267, "step": 11085 }, { "epoch": 1.41, "grad_norm": 0.8123453098551288, "learning_rate": 2.0978314415835643e-06, "loss": 0.5246, "step": 11086 }, { "epoch": 1.41, "grad_norm": 0.663872066440253, "learning_rate": 2.096991474966132e-06, "loss": 0.52, "step": 11087 }, { "epoch": 1.41, "grad_norm": 0.5752085595551722, "learning_rate": 2.0961516319204973e-06, "loss": 0.4402, "step": 11088 }, { "epoch": 1.41, "grad_norm": 0.5324093230432396, "learning_rate": 2.095311912482406e-06, "loss": 0.4791, "step": 11089 }, { "epoch": 1.41, "grad_norm": 0.7119248843311933, "learning_rate": 2.094472316687607e-06, "loss": 0.4677, "step": 11090 }, { "epoch": 1.41, "grad_norm": 0.7149301728187657, "learning_rate": 2.0936328445718348e-06, "loss": 0.5514, "step": 11091 }, { "epoch": 1.41, "grad_norm": 0.7072166361555027, "learning_rate": 2.0927934961708248e-06, "loss": 0.5195, "step": 11092 }, { "epoch": 1.41, "grad_norm": 0.6971844451125663, "learning_rate": 2.0919542715203072e-06, "loss": 0.5274, "step": 11093 }, { "epoch": 1.41, "grad_norm": 0.5999122124993302, "learning_rate": 2.0911151706560024e-06, "loss": 0.4643, "step": 11094 }, { "epoch": 1.41, "grad_norm": 0.5763592098279886, "learning_rate": 2.0902761936136305e-06, "loss": 0.4263, "step": 11095 }, { "epoch": 1.41, "grad_norm": 0.5607236319769412, "learning_rate": 2.0894373404289013e-06, "loss": 0.445, "step": 11096 }, { "epoch": 1.41, "grad_norm": 0.5850345854981462, "learning_rate": 2.0885986111375234e-06, "loss": 0.4734, "step": 11097 }, { "epoch": 1.41, "grad_norm": 0.8683882082768954, "learning_rate": 2.0877600057752014e-06, "loss": 0.5556, "step": 11098 }, { "epoch": 1.41, "grad_norm": 0.6896501195309843, "learning_rate": 2.0869215243776276e-06, "loss": 0.5399, "step": 11099 }, { "epoch": 1.41, "grad_norm": 0.7537990629141019, "learning_rate": 2.0860831669804978e-06, "loss": 0.5455, "step": 11100 }, { "epoch": 1.41, "grad_norm": 0.7181973726146353, "learning_rate": 2.0852449336194943e-06, "loss": 0.5286, "step": 11101 }, { "epoch": 1.41, "grad_norm": 0.7061407701195933, "learning_rate": 2.0844068243303006e-06, "loss": 0.4676, "step": 11102 }, { "epoch": 1.41, "grad_norm": 0.657599665368253, "learning_rate": 2.0835688391485903e-06, "loss": 0.4559, "step": 11103 }, { "epoch": 1.41, "grad_norm": 0.738946946170603, "learning_rate": 2.0827309781100354e-06, "loss": 0.4881, "step": 11104 }, { "epoch": 1.41, "grad_norm": 0.5833938737411153, "learning_rate": 2.081893241250302e-06, "loss": 0.4426, "step": 11105 }, { "epoch": 1.41, "grad_norm": 0.6785732203369954, "learning_rate": 2.081055628605046e-06, "loss": 0.4466, "step": 11106 }, { "epoch": 1.41, "grad_norm": 0.6813897254589607, "learning_rate": 2.080218140209927e-06, "loss": 0.4964, "step": 11107 }, { "epoch": 1.42, "grad_norm": 0.6696063341332344, "learning_rate": 2.079380776100589e-06, "loss": 0.4914, "step": 11108 }, { "epoch": 1.42, "grad_norm": 0.8094250466191285, "learning_rate": 2.0785435363126806e-06, "loss": 0.5014, "step": 11109 }, { "epoch": 1.42, "grad_norm": 0.6141977248208548, "learning_rate": 2.077706420881836e-06, "loss": 0.5114, "step": 11110 }, { "epoch": 1.42, "grad_norm": 0.7190070221409822, "learning_rate": 2.076869429843693e-06, "loss": 0.4776, "step": 11111 }, { "epoch": 1.42, "grad_norm": 0.6745433180701251, "learning_rate": 2.0760325632338753e-06, "loss": 0.528, "step": 11112 }, { "epoch": 1.42, "grad_norm": 0.7104035344480847, "learning_rate": 2.0751958210880097e-06, "loss": 0.5019, "step": 11113 }, { "epoch": 1.42, "grad_norm": 0.602695777505395, "learning_rate": 2.07435920344171e-06, "loss": 0.4308, "step": 11114 }, { "epoch": 1.42, "grad_norm": 0.5926985940921856, "learning_rate": 2.073522710330591e-06, "loss": 0.5058, "step": 11115 }, { "epoch": 1.42, "grad_norm": 0.7151406497260272, "learning_rate": 2.0726863417902576e-06, "loss": 0.5797, "step": 11116 }, { "epoch": 1.42, "grad_norm": 0.712438332720332, "learning_rate": 2.0718500978563115e-06, "loss": 0.4697, "step": 11117 }, { "epoch": 1.42, "grad_norm": 0.5642295050730727, "learning_rate": 2.0710139785643508e-06, "loss": 0.4312, "step": 11118 }, { "epoch": 1.42, "grad_norm": 0.6154997107422874, "learning_rate": 2.0701779839499665e-06, "loss": 0.4491, "step": 11119 }, { "epoch": 1.42, "grad_norm": 0.8665712240284359, "learning_rate": 2.069342114048741e-06, "loss": 0.5318, "step": 11120 }, { "epoch": 1.42, "grad_norm": 0.664008458206658, "learning_rate": 2.0685063688962587e-06, "loss": 0.5069, "step": 11121 }, { "epoch": 1.42, "grad_norm": 0.7600195362182983, "learning_rate": 2.067670748528091e-06, "loss": 0.4687, "step": 11122 }, { "epoch": 1.42, "grad_norm": 0.6821995629661899, "learning_rate": 2.0668352529798107e-06, "loss": 0.4949, "step": 11123 }, { "epoch": 1.42, "grad_norm": 0.6727349306651959, "learning_rate": 2.0659998822869786e-06, "loss": 0.4401, "step": 11124 }, { "epoch": 1.42, "grad_norm": 0.6592809234476772, "learning_rate": 2.065164636485158e-06, "loss": 0.4829, "step": 11125 }, { "epoch": 1.42, "grad_norm": 0.7127709604367063, "learning_rate": 2.0643295156098988e-06, "loss": 0.4981, "step": 11126 }, { "epoch": 1.42, "grad_norm": 0.6462875817956245, "learning_rate": 2.063494519696752e-06, "loss": 0.4907, "step": 11127 }, { "epoch": 1.42, "grad_norm": 0.7877815350100381, "learning_rate": 2.062659648781259e-06, "loss": 0.5668, "step": 11128 }, { "epoch": 1.42, "grad_norm": 0.7131156581817333, "learning_rate": 2.061824902898958e-06, "loss": 0.4769, "step": 11129 }, { "epoch": 1.42, "grad_norm": 0.5986380935803617, "learning_rate": 2.0609902820853813e-06, "loss": 0.4739, "step": 11130 }, { "epoch": 1.42, "grad_norm": 0.7528864095873341, "learning_rate": 2.060155786376059e-06, "loss": 0.512, "step": 11131 }, { "epoch": 1.42, "grad_norm": 0.6173057949871051, "learning_rate": 2.0593214158065086e-06, "loss": 0.4628, "step": 11132 }, { "epoch": 1.42, "grad_norm": 0.6378019275944758, "learning_rate": 2.0584871704122494e-06, "loss": 0.4462, "step": 11133 }, { "epoch": 1.42, "grad_norm": 0.7292249256808478, "learning_rate": 2.0576530502287908e-06, "loss": 0.4834, "step": 11134 }, { "epoch": 1.42, "grad_norm": 0.707451097709552, "learning_rate": 2.056819055291641e-06, "loss": 0.4663, "step": 11135 }, { "epoch": 1.42, "grad_norm": 0.5947493844942456, "learning_rate": 2.0559851856362973e-06, "loss": 0.4755, "step": 11136 }, { "epoch": 1.42, "grad_norm": 0.7038553669745728, "learning_rate": 2.0551514412982583e-06, "loss": 0.4593, "step": 11137 }, { "epoch": 1.42, "grad_norm": 0.6810359109290924, "learning_rate": 2.05431782231301e-06, "loss": 0.4577, "step": 11138 }, { "epoch": 1.42, "grad_norm": 0.8146909236585748, "learning_rate": 2.0534843287160404e-06, "loss": 0.4712, "step": 11139 }, { "epoch": 1.42, "grad_norm": 0.7802695331532603, "learning_rate": 2.0526509605428264e-06, "loss": 0.5225, "step": 11140 }, { "epoch": 1.42, "grad_norm": 0.7135752475066814, "learning_rate": 2.0518177178288435e-06, "loss": 0.4605, "step": 11141 }, { "epoch": 1.42, "grad_norm": 0.5737366886981783, "learning_rate": 2.0509846006095562e-06, "loss": 0.4439, "step": 11142 }, { "epoch": 1.42, "grad_norm": 0.5678085890581392, "learning_rate": 2.0501516089204344e-06, "loss": 0.445, "step": 11143 }, { "epoch": 1.42, "grad_norm": 0.7178302723252884, "learning_rate": 2.04931874279693e-06, "loss": 0.5514, "step": 11144 }, { "epoch": 1.42, "grad_norm": 0.7097429745694298, "learning_rate": 2.0484860022745003e-06, "loss": 0.4709, "step": 11145 }, { "epoch": 1.42, "grad_norm": 0.6620569937354427, "learning_rate": 2.0476533873885873e-06, "loss": 0.4507, "step": 11146 }, { "epoch": 1.42, "grad_norm": 0.6340940340417469, "learning_rate": 2.046820898174637e-06, "loss": 0.4386, "step": 11147 }, { "epoch": 1.42, "grad_norm": 0.6325414899016335, "learning_rate": 2.0459885346680825e-06, "loss": 0.5396, "step": 11148 }, { "epoch": 1.42, "grad_norm": 0.7611898111676605, "learning_rate": 2.0451562969043582e-06, "loss": 0.5393, "step": 11149 }, { "epoch": 1.42, "grad_norm": 0.7448090154732241, "learning_rate": 2.044324184918886e-06, "loss": 0.5038, "step": 11150 }, { "epoch": 1.42, "grad_norm": 0.6488845329716524, "learning_rate": 2.04349219874709e-06, "loss": 0.4581, "step": 11151 }, { "epoch": 1.42, "grad_norm": 0.6951950446896673, "learning_rate": 2.042660338424382e-06, "loss": 0.4794, "step": 11152 }, { "epoch": 1.42, "grad_norm": 1.1265786487627243, "learning_rate": 2.0418286039861752e-06, "loss": 0.5148, "step": 11153 }, { "epoch": 1.42, "grad_norm": 0.7184494276937451, "learning_rate": 2.0409969954678695e-06, "loss": 0.4835, "step": 11154 }, { "epoch": 1.42, "grad_norm": 0.6291749993280014, "learning_rate": 2.040165512904866e-06, "loss": 0.4895, "step": 11155 }, { "epoch": 1.42, "grad_norm": 0.5923525579805895, "learning_rate": 2.0393341563325603e-06, "loss": 0.421, "step": 11156 }, { "epoch": 1.42, "grad_norm": 0.6847813843617118, "learning_rate": 2.038502925786337e-06, "loss": 0.464, "step": 11157 }, { "epoch": 1.42, "grad_norm": 0.6029038851321042, "learning_rate": 2.0376718213015824e-06, "loss": 0.4372, "step": 11158 }, { "epoch": 1.42, "grad_norm": 0.7022769911202208, "learning_rate": 2.0368408429136697e-06, "loss": 0.5035, "step": 11159 }, { "epoch": 1.42, "grad_norm": 0.6747759573489848, "learning_rate": 2.036009990657974e-06, "loss": 0.4732, "step": 11160 }, { "epoch": 1.42, "grad_norm": 0.7743521332684044, "learning_rate": 2.035179264569863e-06, "loss": 0.4949, "step": 11161 }, { "epoch": 1.42, "grad_norm": 0.7944481008965669, "learning_rate": 2.0343486646846945e-06, "loss": 0.5759, "step": 11162 }, { "epoch": 1.42, "grad_norm": 0.7901209155580213, "learning_rate": 2.0335181910378286e-06, "loss": 0.5553, "step": 11163 }, { "epoch": 1.42, "grad_norm": 0.6756152331872225, "learning_rate": 2.0326878436646118e-06, "loss": 0.4994, "step": 11164 }, { "epoch": 1.42, "grad_norm": 0.7415721565195527, "learning_rate": 2.0318576226003932e-06, "loss": 0.5608, "step": 11165 }, { "epoch": 1.42, "grad_norm": 0.7322104642579267, "learning_rate": 2.031027527880509e-06, "loss": 0.4858, "step": 11166 }, { "epoch": 1.42, "grad_norm": 0.7065383463815319, "learning_rate": 2.0301975595402954e-06, "loss": 0.4636, "step": 11167 }, { "epoch": 1.42, "grad_norm": 0.5976634669660313, "learning_rate": 2.029367717615084e-06, "loss": 0.4799, "step": 11168 }, { "epoch": 1.42, "grad_norm": 0.8025401793153247, "learning_rate": 2.0285380021401934e-06, "loss": 0.5876, "step": 11169 }, { "epoch": 1.42, "grad_norm": 0.671937427847929, "learning_rate": 2.0277084131509468e-06, "loss": 0.4363, "step": 11170 }, { "epoch": 1.42, "grad_norm": 0.7865110059584666, "learning_rate": 2.026878950682653e-06, "loss": 0.5837, "step": 11171 }, { "epoch": 1.42, "grad_norm": 0.8299314315854112, "learning_rate": 2.0260496147706233e-06, "loss": 0.5397, "step": 11172 }, { "epoch": 1.42, "grad_norm": 0.7134080059598646, "learning_rate": 2.025220405450157e-06, "loss": 0.4863, "step": 11173 }, { "epoch": 1.42, "grad_norm": 0.6397844984212663, "learning_rate": 2.0243913227565526e-06, "loss": 0.5193, "step": 11174 }, { "epoch": 1.42, "grad_norm": 0.9453151750863827, "learning_rate": 2.0235623667250996e-06, "loss": 0.5469, "step": 11175 }, { "epoch": 1.42, "grad_norm": 0.7859150893954437, "learning_rate": 2.022733537391088e-06, "loss": 0.5461, "step": 11176 }, { "epoch": 1.42, "grad_norm": 0.827419865591755, "learning_rate": 2.021904834789793e-06, "loss": 0.546, "step": 11177 }, { "epoch": 1.42, "grad_norm": 0.7229751042227601, "learning_rate": 2.0210762589564942e-06, "loss": 0.5172, "step": 11178 }, { "epoch": 1.42, "grad_norm": 0.6197193337111703, "learning_rate": 2.0202478099264565e-06, "loss": 0.4905, "step": 11179 }, { "epoch": 1.42, "grad_norm": 0.7552588795551329, "learning_rate": 2.0194194877349516e-06, "loss": 0.4981, "step": 11180 }, { "epoch": 1.42, "grad_norm": 0.8029246219288291, "learning_rate": 2.0185912924172325e-06, "loss": 0.5319, "step": 11181 }, { "epoch": 1.42, "grad_norm": 0.6915913682263175, "learning_rate": 2.017763224008556e-06, "loss": 0.5061, "step": 11182 }, { "epoch": 1.42, "grad_norm": 0.7326998335946943, "learning_rate": 2.016935282544168e-06, "loss": 0.5124, "step": 11183 }, { "epoch": 1.42, "grad_norm": 0.7469400014112825, "learning_rate": 2.0161074680593144e-06, "loss": 0.5211, "step": 11184 }, { "epoch": 1.42, "grad_norm": 0.6264426768938137, "learning_rate": 2.0152797805892286e-06, "loss": 0.4671, "step": 11185 }, { "epoch": 1.43, "grad_norm": 0.5935187660313015, "learning_rate": 2.014452220169147e-06, "loss": 0.4217, "step": 11186 }, { "epoch": 1.43, "grad_norm": 0.782048419085864, "learning_rate": 2.013624786834292e-06, "loss": 0.5533, "step": 11187 }, { "epoch": 1.43, "grad_norm": 0.856726064302872, "learning_rate": 2.012797480619888e-06, "loss": 0.5277, "step": 11188 }, { "epoch": 1.43, "grad_norm": 0.9184237425366315, "learning_rate": 2.011970301561148e-06, "loss": 0.4777, "step": 11189 }, { "epoch": 1.43, "grad_norm": 0.6465520845948611, "learning_rate": 2.011143249693286e-06, "loss": 0.4747, "step": 11190 }, { "epoch": 1.43, "grad_norm": 0.6003114960282584, "learning_rate": 2.0103163250515007e-06, "loss": 0.4776, "step": 11191 }, { "epoch": 1.43, "grad_norm": 0.7659828507649711, "learning_rate": 2.009489527671e-06, "loss": 0.5272, "step": 11192 }, { "epoch": 1.43, "grad_norm": 0.6893242718725018, "learning_rate": 2.0086628575869716e-06, "loss": 0.449, "step": 11193 }, { "epoch": 1.43, "grad_norm": 0.6823328161925785, "learning_rate": 2.007836314834608e-06, "loss": 0.4649, "step": 11194 }, { "epoch": 1.43, "grad_norm": 0.630098347005655, "learning_rate": 2.0070098994490893e-06, "loss": 0.4504, "step": 11195 }, { "epoch": 1.43, "grad_norm": 0.6010447452932413, "learning_rate": 2.006183611465597e-06, "loss": 0.4465, "step": 11196 }, { "epoch": 1.43, "grad_norm": 0.6568021747081857, "learning_rate": 2.0053574509192996e-06, "loss": 0.4214, "step": 11197 }, { "epoch": 1.43, "grad_norm": 0.6959581616117472, "learning_rate": 2.004531417845368e-06, "loss": 0.5096, "step": 11198 }, { "epoch": 1.43, "grad_norm": 0.6687655739177488, "learning_rate": 2.0037055122789594e-06, "loss": 0.518, "step": 11199 }, { "epoch": 1.43, "grad_norm": 0.8096255649297281, "learning_rate": 2.0028797342552346e-06, "loss": 0.4608, "step": 11200 }, { "epoch": 1.43, "grad_norm": 0.5901434615746427, "learning_rate": 2.00205408380934e-06, "loss": 0.4439, "step": 11201 }, { "epoch": 1.43, "grad_norm": 0.6712716973862201, "learning_rate": 2.0012285609764255e-06, "loss": 0.4618, "step": 11202 }, { "epoch": 1.43, "grad_norm": 0.5695432627942246, "learning_rate": 2.0004031657916254e-06, "loss": 0.3809, "step": 11203 }, { "epoch": 1.43, "grad_norm": 0.5874289189650648, "learning_rate": 1.9995778982900783e-06, "loss": 0.4209, "step": 11204 }, { "epoch": 1.43, "grad_norm": 0.5770341747333945, "learning_rate": 1.9987527585069117e-06, "loss": 0.3952, "step": 11205 }, { "epoch": 1.43, "grad_norm": 0.5818497800879188, "learning_rate": 1.997927746477251e-06, "loss": 0.4737, "step": 11206 }, { "epoch": 1.43, "grad_norm": 0.7840189837384709, "learning_rate": 1.9971028622362116e-06, "loss": 0.5592, "step": 11207 }, { "epoch": 1.43, "grad_norm": 0.652611209058209, "learning_rate": 1.996278105818909e-06, "loss": 0.4959, "step": 11208 }, { "epoch": 1.43, "grad_norm": 0.7051516265529143, "learning_rate": 1.9954534772604473e-06, "loss": 0.5215, "step": 11209 }, { "epoch": 1.43, "grad_norm": 0.7113256398763603, "learning_rate": 1.994628976595932e-06, "loss": 0.565, "step": 11210 }, { "epoch": 1.43, "grad_norm": 0.6076305234717412, "learning_rate": 1.993804603860455e-06, "loss": 0.4279, "step": 11211 }, { "epoch": 1.43, "grad_norm": 0.5894178483904796, "learning_rate": 1.9929803590891115e-06, "loss": 0.4303, "step": 11212 }, { "epoch": 1.43, "grad_norm": 0.6180690102402564, "learning_rate": 1.992156242316984e-06, "loss": 0.4802, "step": 11213 }, { "epoch": 1.43, "grad_norm": 0.8086175978414927, "learning_rate": 1.991332253579154e-06, "loss": 0.5127, "step": 11214 }, { "epoch": 1.43, "grad_norm": 0.6086401440154163, "learning_rate": 1.990508392910695e-06, "loss": 0.4643, "step": 11215 }, { "epoch": 1.43, "grad_norm": 0.7020827946931136, "learning_rate": 1.9896846603466762e-06, "loss": 0.4307, "step": 11216 }, { "epoch": 1.43, "grad_norm": 0.6059436710261036, "learning_rate": 1.988861055922164e-06, "loss": 0.5241, "step": 11217 }, { "epoch": 1.43, "grad_norm": 0.7541097747004496, "learning_rate": 1.9880375796722116e-06, "loss": 0.4994, "step": 11218 }, { "epoch": 1.43, "grad_norm": 0.56993310040587, "learning_rate": 1.9872142316318775e-06, "loss": 0.4395, "step": 11219 }, { "epoch": 1.43, "grad_norm": 0.7569533892047728, "learning_rate": 1.986391011836204e-06, "loss": 0.5588, "step": 11220 }, { "epoch": 1.43, "grad_norm": 0.6036030514550667, "learning_rate": 1.985567920320235e-06, "loss": 0.4207, "step": 11221 }, { "epoch": 1.43, "grad_norm": 0.580993050558816, "learning_rate": 1.9847449571190087e-06, "loss": 0.4573, "step": 11222 }, { "epoch": 1.43, "grad_norm": 0.7089009313971928, "learning_rate": 1.983922122267552e-06, "loss": 0.4194, "step": 11223 }, { "epoch": 1.43, "grad_norm": 0.6714513655693373, "learning_rate": 1.983099415800895e-06, "loss": 0.4894, "step": 11224 }, { "epoch": 1.43, "grad_norm": 0.6338727037309118, "learning_rate": 1.982276837754053e-06, "loss": 0.504, "step": 11225 }, { "epoch": 1.43, "grad_norm": 0.705206243507172, "learning_rate": 1.9814543881620447e-06, "loss": 0.4273, "step": 11226 }, { "epoch": 1.43, "grad_norm": 0.657490437945099, "learning_rate": 1.980632067059875e-06, "loss": 0.4972, "step": 11227 }, { "epoch": 1.43, "grad_norm": 0.7166418058104774, "learning_rate": 1.97980987448255e-06, "loss": 0.5376, "step": 11228 }, { "epoch": 1.43, "grad_norm": 0.8066864628986806, "learning_rate": 1.9789878104650694e-06, "loss": 0.5398, "step": 11229 }, { "epoch": 1.43, "grad_norm": 0.6102186772099529, "learning_rate": 1.978165875042422e-06, "loss": 0.4401, "step": 11230 }, { "epoch": 1.43, "grad_norm": 0.6344805523278496, "learning_rate": 1.977344068249599e-06, "loss": 0.4665, "step": 11231 }, { "epoch": 1.43, "grad_norm": 0.5943139674369584, "learning_rate": 1.976522390121578e-06, "loss": 0.4631, "step": 11232 }, { "epoch": 1.43, "grad_norm": 0.7749393310510425, "learning_rate": 1.9757008406933385e-06, "loss": 0.546, "step": 11233 }, { "epoch": 1.43, "grad_norm": 0.8174546481772259, "learning_rate": 1.974879419999849e-06, "loss": 0.5262, "step": 11234 }, { "epoch": 1.43, "grad_norm": 0.7244544058728543, "learning_rate": 1.9740581280760767e-06, "loss": 0.5439, "step": 11235 }, { "epoch": 1.43, "grad_norm": 0.7589498656694413, "learning_rate": 1.9732369649569794e-06, "loss": 0.5254, "step": 11236 }, { "epoch": 1.43, "grad_norm": 0.6228498479634623, "learning_rate": 1.9724159306775136e-06, "loss": 0.4858, "step": 11237 }, { "epoch": 1.43, "grad_norm": 0.7168217106689247, "learning_rate": 1.971595025272625e-06, "loss": 0.4725, "step": 11238 }, { "epoch": 1.43, "grad_norm": 0.7366200843304248, "learning_rate": 1.9707742487772607e-06, "loss": 0.4036, "step": 11239 }, { "epoch": 1.43, "grad_norm": 0.6148224541790992, "learning_rate": 1.9699536012263546e-06, "loss": 0.472, "step": 11240 }, { "epoch": 1.43, "grad_norm": 0.7975209229405948, "learning_rate": 1.9691330826548415e-06, "loss": 0.5936, "step": 11241 }, { "epoch": 1.43, "grad_norm": 0.7679398132762768, "learning_rate": 1.9683126930976477e-06, "loss": 0.5078, "step": 11242 }, { "epoch": 1.43, "grad_norm": 0.7457473173654783, "learning_rate": 1.967492432589696e-06, "loss": 0.5207, "step": 11243 }, { "epoch": 1.43, "grad_norm": 0.8982439231148711, "learning_rate": 1.9666723011659e-06, "loss": 0.5527, "step": 11244 }, { "epoch": 1.43, "grad_norm": 0.7989003897825304, "learning_rate": 1.965852298861172e-06, "loss": 0.5473, "step": 11245 }, { "epoch": 1.43, "grad_norm": 0.7330452786755091, "learning_rate": 1.965032425710414e-06, "loss": 0.5118, "step": 11246 }, { "epoch": 1.43, "grad_norm": 0.6442375600589755, "learning_rate": 1.9642126817485297e-06, "loss": 0.4427, "step": 11247 }, { "epoch": 1.43, "grad_norm": 0.6781118787117099, "learning_rate": 1.9633930670104084e-06, "loss": 0.4561, "step": 11248 }, { "epoch": 1.43, "grad_norm": 0.5674129129708307, "learning_rate": 1.9625735815309425e-06, "loss": 0.4501, "step": 11249 }, { "epoch": 1.43, "grad_norm": 0.8035437835222323, "learning_rate": 1.9617542253450116e-06, "loss": 0.5097, "step": 11250 }, { "epoch": 1.43, "grad_norm": 0.6751218774101819, "learning_rate": 1.960934998487496e-06, "loss": 0.5032, "step": 11251 }, { "epoch": 1.43, "grad_norm": 0.7490261223356152, "learning_rate": 1.960115900993264e-06, "loss": 0.5509, "step": 11252 }, { "epoch": 1.43, "grad_norm": 1.2838419789777458, "learning_rate": 1.9592969328971844e-06, "loss": 0.5794, "step": 11253 }, { "epoch": 1.43, "grad_norm": 0.7122731410537773, "learning_rate": 1.9584780942341177e-06, "loss": 0.4885, "step": 11254 }, { "epoch": 1.43, "grad_norm": 0.6522230509906171, "learning_rate": 1.95765938503892e-06, "loss": 0.436, "step": 11255 }, { "epoch": 1.43, "grad_norm": 0.7400595324739233, "learning_rate": 1.95684080534644e-06, "loss": 0.5197, "step": 11256 }, { "epoch": 1.43, "grad_norm": 0.648825901543588, "learning_rate": 1.956022355191523e-06, "loss": 0.4806, "step": 11257 }, { "epoch": 1.43, "grad_norm": 0.7492382274165247, "learning_rate": 1.955204034609006e-06, "loss": 0.5588, "step": 11258 }, { "epoch": 1.43, "grad_norm": 0.7775395558613977, "learning_rate": 1.9543858436337254e-06, "loss": 0.5107, "step": 11259 }, { "epoch": 1.43, "grad_norm": 0.6228703595391435, "learning_rate": 1.953567782300505e-06, "loss": 0.4804, "step": 11260 }, { "epoch": 1.43, "grad_norm": 0.8130732016887502, "learning_rate": 1.9527498506441704e-06, "loss": 0.5168, "step": 11261 }, { "epoch": 1.43, "grad_norm": 0.6351959209709755, "learning_rate": 1.9519320486995362e-06, "loss": 0.4897, "step": 11262 }, { "epoch": 1.43, "grad_norm": 0.7671735089353404, "learning_rate": 1.951114376501416e-06, "loss": 0.4768, "step": 11263 }, { "epoch": 1.43, "grad_norm": 0.6259057856947268, "learning_rate": 1.9502968340846118e-06, "loss": 0.4697, "step": 11264 }, { "epoch": 1.44, "grad_norm": 0.7246265908714671, "learning_rate": 1.9494794214839276e-06, "loss": 0.5399, "step": 11265 }, { "epoch": 1.44, "grad_norm": 0.9907033676110238, "learning_rate": 1.9486621387341533e-06, "loss": 0.5314, "step": 11266 }, { "epoch": 1.44, "grad_norm": 0.7606284064873436, "learning_rate": 1.9478449858700843e-06, "loss": 0.5115, "step": 11267 }, { "epoch": 1.44, "grad_norm": 0.7989713122800921, "learning_rate": 1.947027962926499e-06, "loss": 0.4791, "step": 11268 }, { "epoch": 1.44, "grad_norm": 0.5817381244530586, "learning_rate": 1.94621106993818e-06, "loss": 0.4393, "step": 11269 }, { "epoch": 1.44, "grad_norm": 0.7096321112885071, "learning_rate": 1.9453943069398944e-06, "loss": 0.4987, "step": 11270 }, { "epoch": 1.44, "grad_norm": 0.876752355322076, "learning_rate": 1.944577673966415e-06, "loss": 0.531, "step": 11271 }, { "epoch": 1.44, "grad_norm": 0.7602148269568045, "learning_rate": 1.943761171052497e-06, "loss": 0.5576, "step": 11272 }, { "epoch": 1.44, "grad_norm": 0.8329920170953107, "learning_rate": 1.9429447982329024e-06, "loss": 0.5284, "step": 11273 }, { "epoch": 1.44, "grad_norm": 0.684407161842788, "learning_rate": 1.9421285555423767e-06, "loss": 0.491, "step": 11274 }, { "epoch": 1.44, "grad_norm": 0.7134658758564494, "learning_rate": 1.9413124430156687e-06, "loss": 0.4671, "step": 11275 }, { "epoch": 1.44, "grad_norm": 0.72173377341069, "learning_rate": 1.9404964606875144e-06, "loss": 0.5359, "step": 11276 }, { "epoch": 1.44, "grad_norm": 0.7418848216152063, "learning_rate": 1.93968060859265e-06, "loss": 0.5247, "step": 11277 }, { "epoch": 1.44, "grad_norm": 0.6680440353861876, "learning_rate": 1.9388648867658017e-06, "loss": 0.4564, "step": 11278 }, { "epoch": 1.44, "grad_norm": 0.7389863164262619, "learning_rate": 1.938049295241693e-06, "loss": 0.4955, "step": 11279 }, { "epoch": 1.44, "grad_norm": 0.5829233732668375, "learning_rate": 1.9372338340550427e-06, "loss": 0.4627, "step": 11280 }, { "epoch": 1.44, "grad_norm": 0.8067337924033198, "learning_rate": 1.936418503240559e-06, "loss": 0.5247, "step": 11281 }, { "epoch": 1.44, "grad_norm": 0.6413165594994173, "learning_rate": 1.9356033028329524e-06, "loss": 0.4939, "step": 11282 }, { "epoch": 1.44, "grad_norm": 0.7492170614952702, "learning_rate": 1.934788232866918e-06, "loss": 0.5465, "step": 11283 }, { "epoch": 1.44, "grad_norm": 0.5525736106000592, "learning_rate": 1.933973293377154e-06, "loss": 0.4346, "step": 11284 }, { "epoch": 1.44, "grad_norm": 0.7094188651103669, "learning_rate": 1.9331584843983513e-06, "loss": 0.477, "step": 11285 }, { "epoch": 1.44, "grad_norm": 0.5852625217234446, "learning_rate": 1.9323438059651895e-06, "loss": 0.4597, "step": 11286 }, { "epoch": 1.44, "grad_norm": 0.5883877451196234, "learning_rate": 1.9315292581123514e-06, "loss": 0.462, "step": 11287 }, { "epoch": 1.44, "grad_norm": 0.6345828121336127, "learning_rate": 1.930714840874505e-06, "loss": 0.487, "step": 11288 }, { "epoch": 1.44, "grad_norm": 0.7379326876688959, "learning_rate": 1.9299005542863213e-06, "loss": 0.5374, "step": 11289 }, { "epoch": 1.44, "grad_norm": 0.5923668077726157, "learning_rate": 1.9290863983824594e-06, "loss": 0.4956, "step": 11290 }, { "epoch": 1.44, "grad_norm": 1.449338058742408, "learning_rate": 1.9282723731975754e-06, "loss": 0.507, "step": 11291 }, { "epoch": 1.44, "grad_norm": 0.6693495554699556, "learning_rate": 1.927458478766323e-06, "loss": 0.4585, "step": 11292 }, { "epoch": 1.44, "grad_norm": 0.9453083517928659, "learning_rate": 1.9266447151233432e-06, "loss": 0.53, "step": 11293 }, { "epoch": 1.44, "grad_norm": 0.6765940905558698, "learning_rate": 1.925831082303278e-06, "loss": 0.5118, "step": 11294 }, { "epoch": 1.44, "grad_norm": 0.7102414801322271, "learning_rate": 1.925017580340759e-06, "loss": 0.4205, "step": 11295 }, { "epoch": 1.44, "grad_norm": 0.6814300141816841, "learning_rate": 1.924204209270416e-06, "loss": 0.4422, "step": 11296 }, { "epoch": 1.44, "grad_norm": 0.6169935658259402, "learning_rate": 1.9233909691268705e-06, "loss": 0.5047, "step": 11297 }, { "epoch": 1.44, "grad_norm": 0.6935651286225928, "learning_rate": 1.922577859944741e-06, "loss": 0.5163, "step": 11298 }, { "epoch": 1.44, "grad_norm": 0.6450800111967042, "learning_rate": 1.921764881758636e-06, "loss": 0.4421, "step": 11299 }, { "epoch": 1.44, "grad_norm": 0.620304074890015, "learning_rate": 1.9209520346031656e-06, "loss": 0.4345, "step": 11300 }, { "epoch": 1.44, "grad_norm": 0.7315790932606675, "learning_rate": 1.9201393185129256e-06, "loss": 0.4929, "step": 11301 }, { "epoch": 1.44, "grad_norm": 0.7004323220145465, "learning_rate": 1.919326733522515e-06, "loss": 0.5108, "step": 11302 }, { "epoch": 1.44, "grad_norm": 0.6271859644453363, "learning_rate": 1.918514279666518e-06, "loss": 0.451, "step": 11303 }, { "epoch": 1.44, "grad_norm": 0.6497526757629732, "learning_rate": 1.9177019569795235e-06, "loss": 0.4695, "step": 11304 }, { "epoch": 1.44, "grad_norm": 0.7336140160469972, "learning_rate": 1.916889765496106e-06, "loss": 0.534, "step": 11305 }, { "epoch": 1.44, "grad_norm": 0.662297858676538, "learning_rate": 1.9160777052508404e-06, "loss": 0.46, "step": 11306 }, { "epoch": 1.44, "grad_norm": 0.6510159096884519, "learning_rate": 1.915265776278291e-06, "loss": 0.5155, "step": 11307 }, { "epoch": 1.44, "grad_norm": 0.6823491584859847, "learning_rate": 1.9144539786130212e-06, "loss": 0.4816, "step": 11308 }, { "epoch": 1.44, "grad_norm": 0.7294871088530409, "learning_rate": 1.913642312289584e-06, "loss": 0.5134, "step": 11309 }, { "epoch": 1.44, "grad_norm": 0.5819265618427858, "learning_rate": 1.912830777342533e-06, "loss": 0.4401, "step": 11310 }, { "epoch": 1.44, "grad_norm": 0.6078669650032759, "learning_rate": 1.9120193738064088e-06, "loss": 0.4367, "step": 11311 }, { "epoch": 1.44, "grad_norm": 0.9069761228682602, "learning_rate": 1.9112081017157535e-06, "loss": 0.5096, "step": 11312 }, { "epoch": 1.44, "grad_norm": 0.6029898006326574, "learning_rate": 1.910396961105097e-06, "loss": 0.4402, "step": 11313 }, { "epoch": 1.44, "grad_norm": 0.6438412119564003, "learning_rate": 1.909585952008971e-06, "loss": 0.4172, "step": 11314 }, { "epoch": 1.44, "grad_norm": 0.5404246754231471, "learning_rate": 1.908775074461892e-06, "loss": 0.4303, "step": 11315 }, { "epoch": 1.44, "grad_norm": 0.5388010883116349, "learning_rate": 1.9079643284983836e-06, "loss": 0.3819, "step": 11316 }, { "epoch": 1.44, "grad_norm": 0.605711647623545, "learning_rate": 1.9071537141529512e-06, "loss": 0.4336, "step": 11317 }, { "epoch": 1.44, "grad_norm": 0.6829792860415591, "learning_rate": 1.9063432314601033e-06, "loss": 0.5251, "step": 11318 }, { "epoch": 1.44, "grad_norm": 0.5740065416349589, "learning_rate": 1.9055328804543367e-06, "loss": 0.4714, "step": 11319 }, { "epoch": 1.44, "grad_norm": 0.716931513077479, "learning_rate": 1.904722661170148e-06, "loss": 0.5264, "step": 11320 }, { "epoch": 1.44, "grad_norm": 0.671637368678671, "learning_rate": 1.9039125736420233e-06, "loss": 0.4914, "step": 11321 }, { "epoch": 1.44, "grad_norm": 0.620428701543499, "learning_rate": 1.9031026179044488e-06, "loss": 0.4891, "step": 11322 }, { "epoch": 1.44, "grad_norm": 0.768085628408398, "learning_rate": 1.9022927939918972e-06, "loss": 0.5227, "step": 11323 }, { "epoch": 1.44, "grad_norm": 0.6760972529286847, "learning_rate": 1.9014831019388447e-06, "loss": 0.4718, "step": 11324 }, { "epoch": 1.44, "grad_norm": 0.6478800558768326, "learning_rate": 1.9006735417797528e-06, "loss": 0.4785, "step": 11325 }, { "epoch": 1.44, "grad_norm": 0.647028749615993, "learning_rate": 1.8998641135490863e-06, "loss": 0.4857, "step": 11326 }, { "epoch": 1.44, "grad_norm": 0.6887815457774414, "learning_rate": 1.899054817281296e-06, "loss": 0.4534, "step": 11327 }, { "epoch": 1.44, "grad_norm": 0.6492984440097781, "learning_rate": 1.898245653010833e-06, "loss": 0.4748, "step": 11328 }, { "epoch": 1.44, "grad_norm": 0.585218737768615, "learning_rate": 1.8974366207721407e-06, "loss": 0.4185, "step": 11329 }, { "epoch": 1.44, "grad_norm": 0.6147387354800559, "learning_rate": 1.8966277205996592e-06, "loss": 0.4824, "step": 11330 }, { "epoch": 1.44, "grad_norm": 0.5514378080388994, "learning_rate": 1.8958189525278164e-06, "loss": 0.4462, "step": 11331 }, { "epoch": 1.44, "grad_norm": 0.6715572472281072, "learning_rate": 1.8950103165910433e-06, "loss": 0.425, "step": 11332 }, { "epoch": 1.44, "grad_norm": 0.6701754334618644, "learning_rate": 1.8942018128237571e-06, "loss": 0.4713, "step": 11333 }, { "epoch": 1.44, "grad_norm": 0.7383832353037361, "learning_rate": 1.8933934412603766e-06, "loss": 0.493, "step": 11334 }, { "epoch": 1.44, "grad_norm": 0.7137313548136808, "learning_rate": 1.892585201935308e-06, "loss": 0.4885, "step": 11335 }, { "epoch": 1.44, "grad_norm": 0.654454215507374, "learning_rate": 1.89177709488296e-06, "loss": 0.4481, "step": 11336 }, { "epoch": 1.44, "grad_norm": 0.6013489032851089, "learning_rate": 1.890969120137726e-06, "loss": 0.4469, "step": 11337 }, { "epoch": 1.44, "grad_norm": 0.6710988442836858, "learning_rate": 1.890161277734004e-06, "loss": 0.5161, "step": 11338 }, { "epoch": 1.44, "grad_norm": 0.7589133859914434, "learning_rate": 1.889353567706177e-06, "loss": 0.4744, "step": 11339 }, { "epoch": 1.44, "grad_norm": 0.5587001051874372, "learning_rate": 1.8885459900886282e-06, "loss": 0.4116, "step": 11340 }, { "epoch": 1.44, "grad_norm": 0.6221389837172623, "learning_rate": 1.8877385449157354e-06, "loss": 0.4751, "step": 11341 }, { "epoch": 1.44, "grad_norm": 0.7048839085214085, "learning_rate": 1.8869312322218658e-06, "loss": 0.4974, "step": 11342 }, { "epoch": 1.45, "grad_norm": 0.6847705843990224, "learning_rate": 1.8861240520413876e-06, "loss": 0.5328, "step": 11343 }, { "epoch": 1.45, "grad_norm": 0.6374377065226372, "learning_rate": 1.8853170044086562e-06, "loss": 0.4914, "step": 11344 }, { "epoch": 1.45, "grad_norm": 0.8486749525142614, "learning_rate": 1.8845100893580264e-06, "loss": 0.524, "step": 11345 }, { "epoch": 1.45, "grad_norm": 0.6157314279655526, "learning_rate": 1.8837033069238487e-06, "loss": 0.4885, "step": 11346 }, { "epoch": 1.45, "grad_norm": 0.7449133394383595, "learning_rate": 1.8828966571404606e-06, "loss": 0.5735, "step": 11347 }, { "epoch": 1.45, "grad_norm": 0.7700475046969686, "learning_rate": 1.8820901400422032e-06, "loss": 0.4702, "step": 11348 }, { "epoch": 1.45, "grad_norm": 0.6070447399975536, "learning_rate": 1.8812837556634033e-06, "loss": 0.4492, "step": 11349 }, { "epoch": 1.45, "grad_norm": 0.6149711047935372, "learning_rate": 1.8804775040383894e-06, "loss": 0.4682, "step": 11350 }, { "epoch": 1.45, "grad_norm": 0.6203758570810054, "learning_rate": 1.879671385201478e-06, "loss": 0.4655, "step": 11351 }, { "epoch": 1.45, "grad_norm": 0.5958757086161411, "learning_rate": 1.878865399186985e-06, "loss": 0.4392, "step": 11352 }, { "epoch": 1.45, "grad_norm": 0.7035927179298433, "learning_rate": 1.8780595460292195e-06, "loss": 0.5019, "step": 11353 }, { "epoch": 1.45, "grad_norm": 0.6023035148596293, "learning_rate": 1.8772538257624812e-06, "loss": 0.4745, "step": 11354 }, { "epoch": 1.45, "grad_norm": 0.5122257746860032, "learning_rate": 1.8764482384210703e-06, "loss": 0.4242, "step": 11355 }, { "epoch": 1.45, "grad_norm": 0.6360494899344628, "learning_rate": 1.8756427840392749e-06, "loss": 0.4676, "step": 11356 }, { "epoch": 1.45, "grad_norm": 0.787876292155766, "learning_rate": 1.8748374626513838e-06, "loss": 0.517, "step": 11357 }, { "epoch": 1.45, "grad_norm": 0.7996470719930312, "learning_rate": 1.8740322742916738e-06, "loss": 0.4522, "step": 11358 }, { "epoch": 1.45, "grad_norm": 0.7036255965940895, "learning_rate": 1.8732272189944224e-06, "loss": 0.5299, "step": 11359 }, { "epoch": 1.45, "grad_norm": 0.759481931586932, "learning_rate": 1.8724222967938944e-06, "loss": 0.5199, "step": 11360 }, { "epoch": 1.45, "grad_norm": 0.695578461214734, "learning_rate": 1.871617507724357e-06, "loss": 0.5408, "step": 11361 }, { "epoch": 1.45, "grad_norm": 0.7121033471142931, "learning_rate": 1.8708128518200635e-06, "loss": 0.4968, "step": 11362 }, { "epoch": 1.45, "grad_norm": 0.695666574665368, "learning_rate": 1.8700083291152692e-06, "loss": 0.4235, "step": 11363 }, { "epoch": 1.45, "grad_norm": 0.6810425887666096, "learning_rate": 1.8692039396442169e-06, "loss": 0.5027, "step": 11364 }, { "epoch": 1.45, "grad_norm": 0.8222239671783307, "learning_rate": 1.8683996834411483e-06, "loss": 0.5252, "step": 11365 }, { "epoch": 1.45, "grad_norm": 0.8050408408150342, "learning_rate": 1.8675955605402979e-06, "loss": 0.5058, "step": 11366 }, { "epoch": 1.45, "grad_norm": 0.8433880324569696, "learning_rate": 1.866791570975896e-06, "loss": 0.5451, "step": 11367 }, { "epoch": 1.45, "grad_norm": 0.8072636368134379, "learning_rate": 1.8659877147821637e-06, "loss": 0.5163, "step": 11368 }, { "epoch": 1.45, "grad_norm": 0.6638410952524344, "learning_rate": 1.8651839919933206e-06, "loss": 0.544, "step": 11369 }, { "epoch": 1.45, "grad_norm": 0.7098494588444912, "learning_rate": 1.8643804026435758e-06, "loss": 0.528, "step": 11370 }, { "epoch": 1.45, "grad_norm": 0.5987182590874014, "learning_rate": 1.8635769467671394e-06, "loss": 0.4282, "step": 11371 }, { "epoch": 1.45, "grad_norm": 0.6676643531043296, "learning_rate": 1.862773624398208e-06, "loss": 0.4383, "step": 11372 }, { "epoch": 1.45, "grad_norm": 1.8879658039018834, "learning_rate": 1.8619704355709795e-06, "loss": 0.5059, "step": 11373 }, { "epoch": 1.45, "grad_norm": 0.8757993116504977, "learning_rate": 1.8611673803196407e-06, "loss": 0.5329, "step": 11374 }, { "epoch": 1.45, "grad_norm": 0.8558627979305645, "learning_rate": 1.8603644586783775e-06, "loss": 0.5098, "step": 11375 }, { "epoch": 1.45, "grad_norm": 0.6379557817780248, "learning_rate": 1.8595616706813652e-06, "loss": 0.4782, "step": 11376 }, { "epoch": 1.45, "grad_norm": 0.6029648904436841, "learning_rate": 1.8587590163627773e-06, "loss": 0.4839, "step": 11377 }, { "epoch": 1.45, "grad_norm": 0.805259381730763, "learning_rate": 1.8579564957567791e-06, "loss": 0.5867, "step": 11378 }, { "epoch": 1.45, "grad_norm": 0.7387227870112327, "learning_rate": 1.8571541088975347e-06, "loss": 0.5359, "step": 11379 }, { "epoch": 1.45, "grad_norm": 0.6835881395539167, "learning_rate": 1.8563518558191957e-06, "loss": 0.4796, "step": 11380 }, { "epoch": 1.45, "grad_norm": 0.7993258912990056, "learning_rate": 1.8555497365559133e-06, "loss": 0.5667, "step": 11381 }, { "epoch": 1.45, "grad_norm": 0.6453699321625503, "learning_rate": 1.8547477511418288e-06, "loss": 0.422, "step": 11382 }, { "epoch": 1.45, "grad_norm": 0.6184484823844142, "learning_rate": 1.8539458996110838e-06, "loss": 0.4446, "step": 11383 }, { "epoch": 1.45, "grad_norm": 0.7343103680563909, "learning_rate": 1.8531441819978069e-06, "loss": 0.5102, "step": 11384 }, { "epoch": 1.45, "grad_norm": 0.5681145579544644, "learning_rate": 1.8523425983361276e-06, "loss": 0.438, "step": 11385 }, { "epoch": 1.45, "grad_norm": 0.6409367061892022, "learning_rate": 1.8515411486601637e-06, "loss": 0.5028, "step": 11386 }, { "epoch": 1.45, "grad_norm": 0.7225861684427305, "learning_rate": 1.8507398330040343e-06, "loss": 0.4309, "step": 11387 }, { "epoch": 1.45, "grad_norm": 0.6597519877532663, "learning_rate": 1.849938651401844e-06, "loss": 0.4718, "step": 11388 }, { "epoch": 1.45, "grad_norm": 0.6714276098028268, "learning_rate": 1.849137603887702e-06, "loss": 0.513, "step": 11389 }, { "epoch": 1.45, "grad_norm": 0.5961933529361594, "learning_rate": 1.848336690495699e-06, "loss": 0.4208, "step": 11390 }, { "epoch": 1.45, "grad_norm": 0.6045272297501535, "learning_rate": 1.8475359112599362e-06, "loss": 0.3906, "step": 11391 }, { "epoch": 1.45, "grad_norm": 0.8572888042345367, "learning_rate": 1.8467352662144934e-06, "loss": 0.4952, "step": 11392 }, { "epoch": 1.45, "grad_norm": 0.6472699547003887, "learning_rate": 1.8459347553934559e-06, "loss": 0.4775, "step": 11393 }, { "epoch": 1.45, "grad_norm": 0.6696770115024709, "learning_rate": 1.8451343788308952e-06, "loss": 0.5135, "step": 11394 }, { "epoch": 1.45, "grad_norm": 0.8168211517611075, "learning_rate": 1.844334136560884e-06, "loss": 0.5001, "step": 11395 }, { "epoch": 1.45, "grad_norm": 0.6928336793428703, "learning_rate": 1.843534028617483e-06, "loss": 0.4166, "step": 11396 }, { "epoch": 1.45, "grad_norm": 0.6208105925383309, "learning_rate": 1.8427340550347533e-06, "loss": 0.4772, "step": 11397 }, { "epoch": 1.45, "grad_norm": 0.6133442560191401, "learning_rate": 1.8419342158467441e-06, "loss": 0.4637, "step": 11398 }, { "epoch": 1.45, "grad_norm": 0.5850717184361476, "learning_rate": 1.8411345110875057e-06, "loss": 0.4364, "step": 11399 }, { "epoch": 1.45, "grad_norm": 0.6289716505473774, "learning_rate": 1.8403349407910742e-06, "loss": 0.4291, "step": 11400 }, { "epoch": 1.45, "grad_norm": 0.6269852948282604, "learning_rate": 1.8395355049914897e-06, "loss": 0.435, "step": 11401 }, { "epoch": 1.45, "grad_norm": 0.5992514811941154, "learning_rate": 1.8387362037227779e-06, "loss": 0.4228, "step": 11402 }, { "epoch": 1.45, "grad_norm": 0.6669948651458143, "learning_rate": 1.8379370370189636e-06, "loss": 0.4457, "step": 11403 }, { "epoch": 1.45, "grad_norm": 0.5847273644145587, "learning_rate": 1.8371380049140675e-06, "loss": 0.458, "step": 11404 }, { "epoch": 1.45, "grad_norm": 0.7427856648280324, "learning_rate": 1.8363391074420972e-06, "loss": 0.5212, "step": 11405 }, { "epoch": 1.45, "grad_norm": 0.8804268564421519, "learning_rate": 1.8355403446370612e-06, "loss": 0.5632, "step": 11406 }, { "epoch": 1.45, "grad_norm": 0.7269331058491464, "learning_rate": 1.8347417165329628e-06, "loss": 0.5342, "step": 11407 }, { "epoch": 1.45, "grad_norm": 0.7447227093176003, "learning_rate": 1.8339432231637928e-06, "loss": 0.4994, "step": 11408 }, { "epoch": 1.45, "grad_norm": 0.6879438327484899, "learning_rate": 1.8331448645635441e-06, "loss": 0.4818, "step": 11409 }, { "epoch": 1.45, "grad_norm": 0.6820188400244761, "learning_rate": 1.8323466407661972e-06, "loss": 0.4584, "step": 11410 }, { "epoch": 1.45, "grad_norm": 0.790304865458631, "learning_rate": 1.8315485518057335e-06, "loss": 0.4753, "step": 11411 }, { "epoch": 1.45, "grad_norm": 0.8620632386124969, "learning_rate": 1.8307505977161206e-06, "loss": 0.4327, "step": 11412 }, { "epoch": 1.45, "grad_norm": 0.814672743559231, "learning_rate": 1.8299527785313292e-06, "loss": 0.5779, "step": 11413 }, { "epoch": 1.45, "grad_norm": 0.7305306394057026, "learning_rate": 1.829155094285317e-06, "loss": 0.5228, "step": 11414 }, { "epoch": 1.45, "grad_norm": 0.6052248339321922, "learning_rate": 1.8283575450120389e-06, "loss": 0.4842, "step": 11415 }, { "epoch": 1.45, "grad_norm": 0.7594348748008597, "learning_rate": 1.827560130745447e-06, "loss": 0.566, "step": 11416 }, { "epoch": 1.45, "grad_norm": 0.7507693321932712, "learning_rate": 1.8267628515194813e-06, "loss": 0.5104, "step": 11417 }, { "epoch": 1.45, "grad_norm": 0.7326304713664651, "learning_rate": 1.8259657073680826e-06, "loss": 0.5434, "step": 11418 }, { "epoch": 1.45, "grad_norm": 0.6851379089020471, "learning_rate": 1.8251686983251788e-06, "loss": 0.5361, "step": 11419 }, { "epoch": 1.45, "grad_norm": 0.7546095013146231, "learning_rate": 1.8243718244247e-06, "loss": 0.5257, "step": 11420 }, { "epoch": 1.45, "grad_norm": 0.7441539723313164, "learning_rate": 1.8235750857005629e-06, "loss": 0.5138, "step": 11421 }, { "epoch": 1.46, "grad_norm": 0.6026725448776842, "learning_rate": 1.822778482186686e-06, "loss": 0.4859, "step": 11422 }, { "epoch": 1.46, "grad_norm": 0.752596168333072, "learning_rate": 1.8219820139169741e-06, "loss": 0.5233, "step": 11423 }, { "epoch": 1.46, "grad_norm": 0.6210477534583401, "learning_rate": 1.8211856809253348e-06, "loss": 0.4774, "step": 11424 }, { "epoch": 1.46, "grad_norm": 0.7279576260642828, "learning_rate": 1.820389483245661e-06, "loss": 0.4624, "step": 11425 }, { "epoch": 1.46, "grad_norm": 0.6632562080822728, "learning_rate": 1.8195934209118477e-06, "loss": 0.4616, "step": 11426 }, { "epoch": 1.46, "grad_norm": 0.6595600738049575, "learning_rate": 1.8187974939577763e-06, "loss": 0.4518, "step": 11427 }, { "epoch": 1.46, "grad_norm": 0.6129363296267166, "learning_rate": 1.818001702417333e-06, "loss": 0.4411, "step": 11428 }, { "epoch": 1.46, "grad_norm": 0.5890918786735774, "learning_rate": 1.8172060463243874e-06, "loss": 0.445, "step": 11429 }, { "epoch": 1.46, "grad_norm": 0.7662057066416829, "learning_rate": 1.816410525712811e-06, "loss": 0.5797, "step": 11430 }, { "epoch": 1.46, "grad_norm": 0.7353500142376335, "learning_rate": 1.8156151406164645e-06, "loss": 0.5474, "step": 11431 }, { "epoch": 1.46, "grad_norm": 0.7961389928726098, "learning_rate": 1.8148198910692066e-06, "loss": 0.5096, "step": 11432 }, { "epoch": 1.46, "grad_norm": 0.8054147583020271, "learning_rate": 1.8140247771048863e-06, "loss": 0.5398, "step": 11433 }, { "epoch": 1.46, "grad_norm": 0.6711575202310909, "learning_rate": 1.8132297987573516e-06, "loss": 0.4428, "step": 11434 }, { "epoch": 1.46, "grad_norm": 0.5596974355278252, "learning_rate": 1.8124349560604394e-06, "loss": 0.4393, "step": 11435 }, { "epoch": 1.46, "grad_norm": 0.6128956121929855, "learning_rate": 1.8116402490479867e-06, "loss": 0.427, "step": 11436 }, { "epoch": 1.46, "grad_norm": 0.6819978671117611, "learning_rate": 1.8108456777538185e-06, "loss": 0.4981, "step": 11437 }, { "epoch": 1.46, "grad_norm": 0.7662783071339638, "learning_rate": 1.8100512422117606e-06, "loss": 0.5706, "step": 11438 }, { "epoch": 1.46, "grad_norm": 0.685410470491554, "learning_rate": 1.809256942455624e-06, "loss": 0.5081, "step": 11439 }, { "epoch": 1.46, "grad_norm": 0.6449733075945355, "learning_rate": 1.8084627785192265e-06, "loss": 0.4794, "step": 11440 }, { "epoch": 1.46, "grad_norm": 0.7363273460430979, "learning_rate": 1.8076687504363683e-06, "loss": 0.5689, "step": 11441 }, { "epoch": 1.46, "grad_norm": 0.8143814366111622, "learning_rate": 1.8068748582408518e-06, "loss": 0.4884, "step": 11442 }, { "epoch": 1.46, "grad_norm": 0.615905581501184, "learning_rate": 1.806081101966467e-06, "loss": 0.4912, "step": 11443 }, { "epoch": 1.46, "grad_norm": 0.6070018823252646, "learning_rate": 1.8052874816470056e-06, "loss": 0.5057, "step": 11444 }, { "epoch": 1.46, "grad_norm": 0.7045085296061119, "learning_rate": 1.804493997316245e-06, "loss": 0.5331, "step": 11445 }, { "epoch": 1.46, "grad_norm": 0.6514146328919574, "learning_rate": 1.8037006490079656e-06, "loss": 0.4335, "step": 11446 }, { "epoch": 1.46, "grad_norm": 0.7368975197273516, "learning_rate": 1.8029074367559335e-06, "loss": 0.4163, "step": 11447 }, { "epoch": 1.46, "grad_norm": 0.6413597586439923, "learning_rate": 1.8021143605939173e-06, "loss": 0.4392, "step": 11448 }, { "epoch": 1.46, "grad_norm": 0.6562755475241322, "learning_rate": 1.8013214205556722e-06, "loss": 0.4934, "step": 11449 }, { "epoch": 1.46, "grad_norm": 0.5744958712650743, "learning_rate": 1.8005286166749548e-06, "loss": 0.4319, "step": 11450 }, { "epoch": 1.46, "grad_norm": 0.5898944792749423, "learning_rate": 1.7997359489855083e-06, "loss": 0.4334, "step": 11451 }, { "epoch": 1.46, "grad_norm": 0.5474595776835868, "learning_rate": 1.798943417521075e-06, "loss": 0.4424, "step": 11452 }, { "epoch": 1.46, "grad_norm": 0.7212489705428717, "learning_rate": 1.7981510223153919e-06, "loss": 0.4679, "step": 11453 }, { "epoch": 1.46, "grad_norm": 0.6659344670590249, "learning_rate": 1.79735876340219e-06, "loss": 0.4797, "step": 11454 }, { "epoch": 1.46, "grad_norm": 0.5789524318917341, "learning_rate": 1.7965666408151894e-06, "loss": 0.4792, "step": 11455 }, { "epoch": 1.46, "grad_norm": 0.7589410901965923, "learning_rate": 1.7957746545881121e-06, "loss": 0.5447, "step": 11456 }, { "epoch": 1.46, "grad_norm": 1.0046614548571045, "learning_rate": 1.7949828047546668e-06, "loss": 0.5296, "step": 11457 }, { "epoch": 1.46, "grad_norm": 0.7933823037260812, "learning_rate": 1.7941910913485633e-06, "loss": 0.5279, "step": 11458 }, { "epoch": 1.46, "grad_norm": 0.7876437667701834, "learning_rate": 1.7933995144034994e-06, "loss": 0.4971, "step": 11459 }, { "epoch": 1.46, "grad_norm": 0.5719763621423096, "learning_rate": 1.7926080739531727e-06, "loss": 0.4383, "step": 11460 }, { "epoch": 1.46, "grad_norm": 0.6202015258602679, "learning_rate": 1.7918167700312695e-06, "loss": 0.4718, "step": 11461 }, { "epoch": 1.46, "grad_norm": 0.5853265472101226, "learning_rate": 1.7910256026714756e-06, "loss": 0.5024, "step": 11462 }, { "epoch": 1.46, "grad_norm": 0.797943078973722, "learning_rate": 1.790234571907466e-06, "loss": 0.5613, "step": 11463 }, { "epoch": 1.46, "grad_norm": 0.831896870677325, "learning_rate": 1.789443677772914e-06, "loss": 0.5199, "step": 11464 }, { "epoch": 1.46, "grad_norm": 0.607534823891539, "learning_rate": 1.7886529203014864e-06, "loss": 0.4062, "step": 11465 }, { "epoch": 1.46, "grad_norm": 0.6825610866584609, "learning_rate": 1.7878622995268407e-06, "loss": 0.5457, "step": 11466 }, { "epoch": 1.46, "grad_norm": 0.7460391223936172, "learning_rate": 1.7870718154826338e-06, "loss": 0.54, "step": 11467 }, { "epoch": 1.46, "grad_norm": 0.8683466754217286, "learning_rate": 1.7862814682025115e-06, "loss": 0.4967, "step": 11468 }, { "epoch": 1.46, "grad_norm": 0.8502756211600191, "learning_rate": 1.7854912577201167e-06, "loss": 0.5297, "step": 11469 }, { "epoch": 1.46, "grad_norm": 0.7780664606821395, "learning_rate": 1.7847011840690892e-06, "loss": 0.5598, "step": 11470 }, { "epoch": 1.46, "grad_norm": 0.7337613261122693, "learning_rate": 1.7839112472830562e-06, "loss": 0.4994, "step": 11471 }, { "epoch": 1.46, "grad_norm": 0.8168542737263976, "learning_rate": 1.7831214473956454e-06, "loss": 0.5212, "step": 11472 }, { "epoch": 1.46, "grad_norm": 0.8352738925400326, "learning_rate": 1.782331784440473e-06, "loss": 0.5519, "step": 11473 }, { "epoch": 1.46, "grad_norm": 0.8352580204131053, "learning_rate": 1.7815422584511566e-06, "loss": 0.5121, "step": 11474 }, { "epoch": 1.46, "grad_norm": 0.7001900334173825, "learning_rate": 1.7807528694612996e-06, "loss": 0.4768, "step": 11475 }, { "epoch": 1.46, "grad_norm": 0.8445488655757379, "learning_rate": 1.779963617504506e-06, "loss": 0.5117, "step": 11476 }, { "epoch": 1.46, "grad_norm": 0.7406429898031691, "learning_rate": 1.779174502614373e-06, "loss": 0.498, "step": 11477 }, { "epoch": 1.46, "grad_norm": 0.7893810111282417, "learning_rate": 1.7783855248244875e-06, "loss": 0.5518, "step": 11478 }, { "epoch": 1.46, "grad_norm": 0.7855546277686893, "learning_rate": 1.7775966841684367e-06, "loss": 0.4456, "step": 11479 }, { "epoch": 1.46, "grad_norm": 0.6855011200289312, "learning_rate": 1.7768079806797967e-06, "loss": 0.4914, "step": 11480 }, { "epoch": 1.46, "grad_norm": 0.6938643355402148, "learning_rate": 1.7760194143921423e-06, "loss": 0.4794, "step": 11481 }, { "epoch": 1.46, "grad_norm": 0.6574024281054431, "learning_rate": 1.7752309853390377e-06, "loss": 0.428, "step": 11482 }, { "epoch": 1.46, "grad_norm": 0.5673485561577188, "learning_rate": 1.7744426935540464e-06, "loss": 0.4476, "step": 11483 }, { "epoch": 1.46, "grad_norm": 0.6271509276827958, "learning_rate": 1.7736545390707205e-06, "loss": 0.5151, "step": 11484 }, { "epoch": 1.46, "grad_norm": 0.6918206223414506, "learning_rate": 1.7728665219226133e-06, "loss": 0.5171, "step": 11485 }, { "epoch": 1.46, "grad_norm": 0.6263608028548525, "learning_rate": 1.7720786421432635e-06, "loss": 0.5046, "step": 11486 }, { "epoch": 1.46, "grad_norm": 0.6465377676486046, "learning_rate": 1.771290899766213e-06, "loss": 0.4291, "step": 11487 }, { "epoch": 1.46, "grad_norm": 0.6907595096503566, "learning_rate": 1.7705032948249895e-06, "loss": 0.4566, "step": 11488 }, { "epoch": 1.46, "grad_norm": 0.7075940045692216, "learning_rate": 1.7697158273531207e-06, "loss": 0.5063, "step": 11489 }, { "epoch": 1.46, "grad_norm": 0.6813332020786361, "learning_rate": 1.768928497384127e-06, "loss": 0.5052, "step": 11490 }, { "epoch": 1.46, "grad_norm": 0.7446828431298997, "learning_rate": 1.7681413049515233e-06, "loss": 0.4687, "step": 11491 }, { "epoch": 1.46, "grad_norm": 0.5743777001582568, "learning_rate": 1.7673542500888157e-06, "loss": 0.4542, "step": 11492 }, { "epoch": 1.46, "grad_norm": 0.797217248313285, "learning_rate": 1.7665673328295085e-06, "loss": 0.5179, "step": 11493 }, { "epoch": 1.46, "grad_norm": 0.760027149228795, "learning_rate": 1.7657805532070966e-06, "loss": 0.485, "step": 11494 }, { "epoch": 1.46, "grad_norm": 0.7939131495353275, "learning_rate": 1.7649939112550724e-06, "loss": 0.5341, "step": 11495 }, { "epoch": 1.46, "grad_norm": 0.7116622065175608, "learning_rate": 1.7642074070069182e-06, "loss": 0.4707, "step": 11496 }, { "epoch": 1.46, "grad_norm": 0.5997526212237774, "learning_rate": 1.7634210404961165e-06, "loss": 0.4579, "step": 11497 }, { "epoch": 1.46, "grad_norm": 0.6592923954663346, "learning_rate": 1.762634811756137e-06, "loss": 0.4747, "step": 11498 }, { "epoch": 1.46, "grad_norm": 0.5720989469457686, "learning_rate": 1.7618487208204498e-06, "loss": 0.4382, "step": 11499 }, { "epoch": 1.47, "grad_norm": 0.5637432305131495, "learning_rate": 1.7610627677225129e-06, "loss": 0.4027, "step": 11500 }, { "epoch": 1.47, "grad_norm": 0.6003834310857484, "learning_rate": 1.7602769524957842e-06, "loss": 0.4462, "step": 11501 }, { "epoch": 1.47, "grad_norm": 0.678516916390727, "learning_rate": 1.7594912751737132e-06, "loss": 0.4399, "step": 11502 }, { "epoch": 1.47, "grad_norm": 0.609378781142807, "learning_rate": 1.7587057357897447e-06, "loss": 0.44, "step": 11503 }, { "epoch": 1.47, "grad_norm": 0.7350987040078207, "learning_rate": 1.7579203343773138e-06, "loss": 0.4426, "step": 11504 }, { "epoch": 1.47, "grad_norm": 0.690981948533854, "learning_rate": 1.757135070969856e-06, "loss": 0.494, "step": 11505 }, { "epoch": 1.47, "grad_norm": 0.6602228679499301, "learning_rate": 1.756349945600793e-06, "loss": 0.464, "step": 11506 }, { "epoch": 1.47, "grad_norm": 0.6912428253486794, "learning_rate": 1.75556495830355e-06, "loss": 0.4572, "step": 11507 }, { "epoch": 1.47, "grad_norm": 0.6757033083216948, "learning_rate": 1.7547801091115374e-06, "loss": 0.4481, "step": 11508 }, { "epoch": 1.47, "grad_norm": 0.6004194983371172, "learning_rate": 1.7539953980581665e-06, "loss": 0.411, "step": 11509 }, { "epoch": 1.47, "grad_norm": 0.6629981984011224, "learning_rate": 1.753210825176837e-06, "loss": 0.4508, "step": 11510 }, { "epoch": 1.47, "grad_norm": 0.5526800456809966, "learning_rate": 1.7524263905009499e-06, "loss": 0.4468, "step": 11511 }, { "epoch": 1.47, "grad_norm": 0.5990447698490425, "learning_rate": 1.7516420940638912e-06, "loss": 0.4835, "step": 11512 }, { "epoch": 1.47, "grad_norm": 0.6169888985794139, "learning_rate": 1.7508579358990501e-06, "loss": 0.4477, "step": 11513 }, { "epoch": 1.47, "grad_norm": 0.5892362134564003, "learning_rate": 1.7500739160398006e-06, "loss": 0.4263, "step": 11514 }, { "epoch": 1.47, "grad_norm": 0.5534155623237008, "learning_rate": 1.749290034519523e-06, "loss": 0.4296, "step": 11515 }, { "epoch": 1.47, "grad_norm": 0.701035013101386, "learning_rate": 1.7485062913715784e-06, "loss": 0.5216, "step": 11516 }, { "epoch": 1.47, "grad_norm": 0.7680250293511391, "learning_rate": 1.747722686629333e-06, "loss": 0.5052, "step": 11517 }, { "epoch": 1.47, "grad_norm": 0.7417850421576466, "learning_rate": 1.746939220326138e-06, "loss": 0.555, "step": 11518 }, { "epoch": 1.47, "grad_norm": 0.7258841808079408, "learning_rate": 1.746155892495347e-06, "loss": 0.5377, "step": 11519 }, { "epoch": 1.47, "grad_norm": 0.7553149431842945, "learning_rate": 1.7453727031703e-06, "loss": 0.5544, "step": 11520 }, { "epoch": 1.47, "grad_norm": 0.8202925738083765, "learning_rate": 1.7445896523843386e-06, "loss": 0.5436, "step": 11521 }, { "epoch": 1.47, "grad_norm": 0.7932840073403957, "learning_rate": 1.7438067401707909e-06, "loss": 0.5222, "step": 11522 }, { "epoch": 1.47, "grad_norm": 0.6139123674514075, "learning_rate": 1.7430239665629866e-06, "loss": 0.5099, "step": 11523 }, { "epoch": 1.47, "grad_norm": 1.3698117525881095, "learning_rate": 1.7422413315942433e-06, "loss": 0.5158, "step": 11524 }, { "epoch": 1.47, "grad_norm": 0.604257915599999, "learning_rate": 1.7414588352978774e-06, "loss": 0.4342, "step": 11525 }, { "epoch": 1.47, "grad_norm": 0.7645491208958822, "learning_rate": 1.7406764777071938e-06, "loss": 0.4932, "step": 11526 }, { "epoch": 1.47, "grad_norm": 0.6857563844302612, "learning_rate": 1.739894258855498e-06, "loss": 0.5499, "step": 11527 }, { "epoch": 1.47, "grad_norm": 0.5865634869951317, "learning_rate": 1.7391121787760873e-06, "loss": 0.4653, "step": 11528 }, { "epoch": 1.47, "grad_norm": 0.6614811942942823, "learning_rate": 1.7383302375022493e-06, "loss": 0.5516, "step": 11529 }, { "epoch": 1.47, "grad_norm": 0.7539716103220729, "learning_rate": 1.73754843506727e-06, "loss": 0.4831, "step": 11530 }, { "epoch": 1.47, "grad_norm": 0.5865375857353755, "learning_rate": 1.73676677150443e-06, "loss": 0.4301, "step": 11531 }, { "epoch": 1.47, "grad_norm": 0.6760179459749036, "learning_rate": 1.7359852468469995e-06, "loss": 0.4368, "step": 11532 }, { "epoch": 1.47, "grad_norm": 0.7220857390748701, "learning_rate": 1.7352038611282485e-06, "loss": 0.5184, "step": 11533 }, { "epoch": 1.47, "grad_norm": 0.7569950375840301, "learning_rate": 1.7344226143814341e-06, "loss": 0.506, "step": 11534 }, { "epoch": 1.47, "grad_norm": 0.5477283439534922, "learning_rate": 1.733641506639816e-06, "loss": 0.4, "step": 11535 }, { "epoch": 1.47, "grad_norm": 0.640102976398902, "learning_rate": 1.7328605379366391e-06, "loss": 0.4001, "step": 11536 }, { "epoch": 1.47, "grad_norm": 0.5938063790849741, "learning_rate": 1.7320797083051506e-06, "loss": 0.4166, "step": 11537 }, { "epoch": 1.47, "grad_norm": 0.6173801546358957, "learning_rate": 1.7312990177785848e-06, "loss": 0.4189, "step": 11538 }, { "epoch": 1.47, "grad_norm": 0.5318305032781782, "learning_rate": 1.7305184663901741e-06, "loss": 0.4228, "step": 11539 }, { "epoch": 1.47, "grad_norm": 0.7169237283214026, "learning_rate": 1.7297380541731461e-06, "loss": 0.4193, "step": 11540 }, { "epoch": 1.47, "grad_norm": 0.5883173176085361, "learning_rate": 1.7289577811607179e-06, "loss": 0.4777, "step": 11541 }, { "epoch": 1.47, "grad_norm": 0.6366062492789052, "learning_rate": 1.728177647386105e-06, "loss": 0.4791, "step": 11542 }, { "epoch": 1.47, "grad_norm": 0.6983595428373511, "learning_rate": 1.7273976528825132e-06, "loss": 0.5244, "step": 11543 }, { "epoch": 1.47, "grad_norm": 0.7089669432113576, "learning_rate": 1.726617797683147e-06, "loss": 0.5168, "step": 11544 }, { "epoch": 1.47, "grad_norm": 0.7120056115174487, "learning_rate": 1.7258380818211995e-06, "loss": 0.5295, "step": 11545 }, { "epoch": 1.47, "grad_norm": 0.7434373498383949, "learning_rate": 1.725058505329864e-06, "loss": 0.483, "step": 11546 }, { "epoch": 1.47, "grad_norm": 0.6767554191941588, "learning_rate": 1.7242790682423205e-06, "loss": 0.5294, "step": 11547 }, { "epoch": 1.47, "grad_norm": 0.7655589063541344, "learning_rate": 1.723499770591751e-06, "loss": 0.5271, "step": 11548 }, { "epoch": 1.47, "grad_norm": 0.6070402806373606, "learning_rate": 1.722720612411325e-06, "loss": 0.5066, "step": 11549 }, { "epoch": 1.47, "grad_norm": 0.706250517347182, "learning_rate": 1.7219415937342115e-06, "loss": 0.5281, "step": 11550 }, { "epoch": 1.47, "grad_norm": 0.6880917583980143, "learning_rate": 1.7211627145935655e-06, "loss": 0.4951, "step": 11551 }, { "epoch": 1.47, "grad_norm": 0.6322374933541931, "learning_rate": 1.720383975022548e-06, "loss": 0.4784, "step": 11552 }, { "epoch": 1.47, "grad_norm": 0.673863985497878, "learning_rate": 1.7196053750543034e-06, "loss": 0.4706, "step": 11553 }, { "epoch": 1.47, "grad_norm": 0.7028437483328559, "learning_rate": 1.7188269147219772e-06, "loss": 0.5012, "step": 11554 }, { "epoch": 1.47, "grad_norm": 0.6344949671979806, "learning_rate": 1.7180485940587021e-06, "loss": 0.4543, "step": 11555 }, { "epoch": 1.47, "grad_norm": 0.6669204747579709, "learning_rate": 1.7172704130976125e-06, "loss": 0.4921, "step": 11556 }, { "epoch": 1.47, "grad_norm": 0.7643036261885616, "learning_rate": 1.7164923718718296e-06, "loss": 0.5387, "step": 11557 }, { "epoch": 1.47, "grad_norm": 0.6034249820064262, "learning_rate": 1.7157144704144758e-06, "loss": 0.4326, "step": 11558 }, { "epoch": 1.47, "grad_norm": 0.835448185065465, "learning_rate": 1.71493670875866e-06, "loss": 0.4862, "step": 11559 }, { "epoch": 1.47, "grad_norm": 0.6776718149410594, "learning_rate": 1.7141590869374925e-06, "loss": 0.475, "step": 11560 }, { "epoch": 1.47, "grad_norm": 0.6357531812001118, "learning_rate": 1.7133816049840708e-06, "loss": 0.4488, "step": 11561 }, { "epoch": 1.47, "grad_norm": 0.6449287159431228, "learning_rate": 1.7126042629314937e-06, "loss": 0.4583, "step": 11562 }, { "epoch": 1.47, "grad_norm": 0.650441838256173, "learning_rate": 1.7118270608128446e-06, "loss": 0.4691, "step": 11563 }, { "epoch": 1.47, "grad_norm": 0.5524188236511227, "learning_rate": 1.7110499986612133e-06, "loss": 0.4289, "step": 11564 }, { "epoch": 1.47, "grad_norm": 0.600231352496326, "learning_rate": 1.710273076509672e-06, "loss": 0.4067, "step": 11565 }, { "epoch": 1.47, "grad_norm": 0.6465559373369545, "learning_rate": 1.7094962943912951e-06, "loss": 0.4678, "step": 11566 }, { "epoch": 1.47, "grad_norm": 0.5671943370014317, "learning_rate": 1.7087196523391441e-06, "loss": 0.4676, "step": 11567 }, { "epoch": 1.47, "grad_norm": 0.6431900325287857, "learning_rate": 1.707943150386282e-06, "loss": 0.458, "step": 11568 }, { "epoch": 1.47, "grad_norm": 0.8405237219699712, "learning_rate": 1.7071667885657585e-06, "loss": 0.514, "step": 11569 }, { "epoch": 1.47, "grad_norm": 0.6587106144242421, "learning_rate": 1.706390566910624e-06, "loss": 0.4035, "step": 11570 }, { "epoch": 1.47, "grad_norm": 0.6780986998984287, "learning_rate": 1.7056144854539164e-06, "loss": 0.4827, "step": 11571 }, { "epoch": 1.47, "grad_norm": 0.5870662835236904, "learning_rate": 1.7048385442286741e-06, "loss": 0.4463, "step": 11572 }, { "epoch": 1.47, "grad_norm": 0.645092100606818, "learning_rate": 1.7040627432679241e-06, "loss": 0.4913, "step": 11573 }, { "epoch": 1.47, "grad_norm": 0.7125845990609546, "learning_rate": 1.7032870826046922e-06, "loss": 0.5073, "step": 11574 }, { "epoch": 1.47, "grad_norm": 0.853295875398317, "learning_rate": 1.7025115622719929e-06, "loss": 0.4943, "step": 11575 }, { "epoch": 1.47, "grad_norm": 0.7416814189030903, "learning_rate": 1.701736182302839e-06, "loss": 0.6, "step": 11576 }, { "epoch": 1.47, "grad_norm": 0.7131880675678445, "learning_rate": 1.7009609427302359e-06, "loss": 0.5232, "step": 11577 }, { "epoch": 1.47, "grad_norm": 0.8572876704747294, "learning_rate": 1.7001858435871855e-06, "loss": 0.5248, "step": 11578 }, { "epoch": 1.48, "grad_norm": 0.7649702228976412, "learning_rate": 1.699410884906677e-06, "loss": 0.4519, "step": 11579 }, { "epoch": 1.48, "grad_norm": 0.5346343170959683, "learning_rate": 1.6986360667217016e-06, "loss": 0.4721, "step": 11580 }, { "epoch": 1.48, "grad_norm": 0.713307221952802, "learning_rate": 1.6978613890652373e-06, "loss": 0.4939, "step": 11581 }, { "epoch": 1.48, "grad_norm": 0.7546086763042094, "learning_rate": 1.6970868519702638e-06, "loss": 0.5017, "step": 11582 }, { "epoch": 1.48, "grad_norm": 0.6932047398458963, "learning_rate": 1.6963124554697464e-06, "loss": 0.4684, "step": 11583 }, { "epoch": 1.48, "grad_norm": 0.6003799855910703, "learning_rate": 1.6955381995966524e-06, "loss": 0.4724, "step": 11584 }, { "epoch": 1.48, "grad_norm": 0.6089361341662528, "learning_rate": 1.694764084383936e-06, "loss": 0.425, "step": 11585 }, { "epoch": 1.48, "grad_norm": 0.6919833018782288, "learning_rate": 1.6939901098645517e-06, "loss": 0.4615, "step": 11586 }, { "epoch": 1.48, "grad_norm": 0.6286409655726857, "learning_rate": 1.6932162760714433e-06, "loss": 0.5113, "step": 11587 }, { "epoch": 1.48, "grad_norm": 0.7297549599010655, "learning_rate": 1.6924425830375501e-06, "loss": 0.5702, "step": 11588 }, { "epoch": 1.48, "grad_norm": 0.7785206896209446, "learning_rate": 1.691669030795809e-06, "loss": 0.5451, "step": 11589 }, { "epoch": 1.48, "grad_norm": 0.6674820695033553, "learning_rate": 1.6908956193791432e-06, "loss": 0.4699, "step": 11590 }, { "epoch": 1.48, "grad_norm": 0.7202785789230283, "learning_rate": 1.6901223488204781e-06, "loss": 0.5083, "step": 11591 }, { "epoch": 1.48, "grad_norm": 0.6012149512688153, "learning_rate": 1.6893492191527266e-06, "loss": 0.4301, "step": 11592 }, { "epoch": 1.48, "grad_norm": 0.6552170568313688, "learning_rate": 1.6885762304087995e-06, "loss": 0.5229, "step": 11593 }, { "epoch": 1.48, "grad_norm": 0.7565381505453234, "learning_rate": 1.6878033826216016e-06, "loss": 0.4909, "step": 11594 }, { "epoch": 1.48, "grad_norm": 1.6545339847546898, "learning_rate": 1.6870306758240285e-06, "loss": 0.4842, "step": 11595 }, { "epoch": 1.48, "grad_norm": 0.7039311974330968, "learning_rate": 1.686258110048974e-06, "loss": 0.5023, "step": 11596 }, { "epoch": 1.48, "grad_norm": 0.6477100130079324, "learning_rate": 1.6854856853293212e-06, "loss": 0.4484, "step": 11597 }, { "epoch": 1.48, "grad_norm": 0.763500242358217, "learning_rate": 1.6847134016979521e-06, "loss": 0.4686, "step": 11598 }, { "epoch": 1.48, "grad_norm": 1.3907942308871668, "learning_rate": 1.683941259187738e-06, "loss": 0.516, "step": 11599 }, { "epoch": 1.48, "grad_norm": 0.6250893047220832, "learning_rate": 1.6831692578315483e-06, "loss": 0.4873, "step": 11600 }, { "epoch": 1.48, "grad_norm": 0.6227874024729992, "learning_rate": 1.6823973976622455e-06, "loss": 0.533, "step": 11601 }, { "epoch": 1.48, "grad_norm": 0.7012333196607653, "learning_rate": 1.6816256787126822e-06, "loss": 0.5406, "step": 11602 }, { "epoch": 1.48, "grad_norm": 0.7024384664349265, "learning_rate": 1.6808541010157115e-06, "loss": 0.4305, "step": 11603 }, { "epoch": 1.48, "grad_norm": 0.6349009486137714, "learning_rate": 1.680082664604174e-06, "loss": 0.4381, "step": 11604 }, { "epoch": 1.48, "grad_norm": 0.6092294486809704, "learning_rate": 1.6793113695109097e-06, "loss": 0.4124, "step": 11605 }, { "epoch": 1.48, "grad_norm": 0.6150864069303005, "learning_rate": 1.6785402157687474e-06, "loss": 0.4274, "step": 11606 }, { "epoch": 1.48, "grad_norm": 0.6286914796107257, "learning_rate": 1.6777692034105158e-06, "loss": 0.4893, "step": 11607 }, { "epoch": 1.48, "grad_norm": 0.7341163536527543, "learning_rate": 1.6769983324690315e-06, "loss": 0.4637, "step": 11608 }, { "epoch": 1.48, "grad_norm": 0.6791581357361317, "learning_rate": 1.6762276029771113e-06, "loss": 0.5236, "step": 11609 }, { "epoch": 1.48, "grad_norm": 0.9183999407586751, "learning_rate": 1.6754570149675586e-06, "loss": 0.5465, "step": 11610 }, { "epoch": 1.48, "grad_norm": 0.8161592850414057, "learning_rate": 1.6746865684731789e-06, "loss": 0.5096, "step": 11611 }, { "epoch": 1.48, "grad_norm": 0.6323710622298931, "learning_rate": 1.6739162635267643e-06, "loss": 0.4427, "step": 11612 }, { "epoch": 1.48, "grad_norm": 0.6072191275980428, "learning_rate": 1.6731461001611055e-06, "loss": 0.4738, "step": 11613 }, { "epoch": 1.48, "grad_norm": 0.5889060218495732, "learning_rate": 1.672376078408986e-06, "loss": 0.4159, "step": 11614 }, { "epoch": 1.48, "grad_norm": 0.6562959582773006, "learning_rate": 1.6716061983031851e-06, "loss": 0.4423, "step": 11615 }, { "epoch": 1.48, "grad_norm": 0.6160393703774925, "learning_rate": 1.6708364598764703e-06, "loss": 0.5242, "step": 11616 }, { "epoch": 1.48, "grad_norm": 0.6883351946369045, "learning_rate": 1.6700668631616112e-06, "loss": 0.4514, "step": 11617 }, { "epoch": 1.48, "grad_norm": 0.6254235523005447, "learning_rate": 1.6692974081913627e-06, "loss": 0.4591, "step": 11618 }, { "epoch": 1.48, "grad_norm": 0.7676230268042148, "learning_rate": 1.6685280949984823e-06, "loss": 0.451, "step": 11619 }, { "epoch": 1.48, "grad_norm": 0.5594107990452631, "learning_rate": 1.6677589236157126e-06, "loss": 0.4797, "step": 11620 }, { "epoch": 1.48, "grad_norm": 0.6960349098074288, "learning_rate": 1.6669898940757996e-06, "loss": 0.5289, "step": 11621 }, { "epoch": 1.48, "grad_norm": 0.5472261529464447, "learning_rate": 1.6662210064114742e-06, "loss": 0.4429, "step": 11622 }, { "epoch": 1.48, "grad_norm": 0.5724791718624103, "learning_rate": 1.6654522606554684e-06, "loss": 0.4138, "step": 11623 }, { "epoch": 1.48, "grad_norm": 0.7021882781477281, "learning_rate": 1.6646836568405033e-06, "loss": 0.4889, "step": 11624 }, { "epoch": 1.48, "grad_norm": 0.6197098990816787, "learning_rate": 1.6639151949992966e-06, "loss": 0.4532, "step": 11625 }, { "epoch": 1.48, "grad_norm": 0.688513090820751, "learning_rate": 1.6631468751645591e-06, "loss": 0.5016, "step": 11626 }, { "epoch": 1.48, "grad_norm": 0.6051277156718824, "learning_rate": 1.6623786973689981e-06, "loss": 0.4522, "step": 11627 }, { "epoch": 1.48, "grad_norm": 0.5521555833063058, "learning_rate": 1.6616106616453082e-06, "loss": 0.4509, "step": 11628 }, { "epoch": 1.48, "grad_norm": 0.7601612657721504, "learning_rate": 1.6608427680261868e-06, "loss": 0.5949, "step": 11629 }, { "epoch": 1.48, "grad_norm": 0.827222430387003, "learning_rate": 1.6600750165443163e-06, "loss": 0.5038, "step": 11630 }, { "epoch": 1.48, "grad_norm": 0.5697281018012231, "learning_rate": 1.6593074072323812e-06, "loss": 0.4075, "step": 11631 }, { "epoch": 1.48, "grad_norm": 0.5691538339339532, "learning_rate": 1.6585399401230528e-06, "loss": 0.4276, "step": 11632 }, { "epoch": 1.48, "grad_norm": 0.6779596515561276, "learning_rate": 1.657772615249003e-06, "loss": 0.4723, "step": 11633 }, { "epoch": 1.48, "grad_norm": 0.6491495786981641, "learning_rate": 1.6570054326428914e-06, "loss": 0.5221, "step": 11634 }, { "epoch": 1.48, "grad_norm": 0.8610529673071755, "learning_rate": 1.656238392337377e-06, "loss": 0.5459, "step": 11635 }, { "epoch": 1.48, "grad_norm": 0.7328895451009377, "learning_rate": 1.6554714943651078e-06, "loss": 0.4883, "step": 11636 }, { "epoch": 1.48, "grad_norm": 0.6491557838985147, "learning_rate": 1.654704738758731e-06, "loss": 0.4438, "step": 11637 }, { "epoch": 1.48, "grad_norm": 0.660849923400595, "learning_rate": 1.6539381255508802e-06, "loss": 0.4893, "step": 11638 }, { "epoch": 1.48, "grad_norm": 0.7055581571554427, "learning_rate": 1.6531716547741945e-06, "loss": 0.5549, "step": 11639 }, { "epoch": 1.48, "grad_norm": 2.4094232215773728, "learning_rate": 1.6524053264612945e-06, "loss": 0.574, "step": 11640 }, { "epoch": 1.48, "grad_norm": 0.6809876669518666, "learning_rate": 1.651639140644804e-06, "loss": 0.5384, "step": 11641 }, { "epoch": 1.48, "grad_norm": 0.9028688813525833, "learning_rate": 1.6508730973573338e-06, "loss": 0.4999, "step": 11642 }, { "epoch": 1.48, "grad_norm": 0.7819055999206425, "learning_rate": 1.6501071966314952e-06, "loss": 0.4797, "step": 11643 }, { "epoch": 1.48, "grad_norm": 0.7903758568525185, "learning_rate": 1.6493414384998873e-06, "loss": 0.4718, "step": 11644 }, { "epoch": 1.48, "grad_norm": 1.1374231005083308, "learning_rate": 1.6485758229951082e-06, "loss": 0.5196, "step": 11645 }, { "epoch": 1.48, "grad_norm": 0.8436919990140662, "learning_rate": 1.6478103501497455e-06, "loss": 0.5309, "step": 11646 }, { "epoch": 1.48, "grad_norm": 0.762438017786036, "learning_rate": 1.6470450199963856e-06, "loss": 0.4766, "step": 11647 }, { "epoch": 1.48, "grad_norm": 0.780788225908962, "learning_rate": 1.6462798325676033e-06, "loss": 0.5233, "step": 11648 }, { "epoch": 1.48, "grad_norm": 0.6717274026315277, "learning_rate": 1.6455147878959727e-06, "loss": 0.5177, "step": 11649 }, { "epoch": 1.48, "grad_norm": 0.8202416257120945, "learning_rate": 1.644749886014057e-06, "loss": 0.5928, "step": 11650 }, { "epoch": 1.48, "grad_norm": 0.671860262074767, "learning_rate": 1.6439851269544165e-06, "loss": 0.4137, "step": 11651 }, { "epoch": 1.48, "grad_norm": 0.62919415610537, "learning_rate": 1.6432205107496069e-06, "loss": 0.4568, "step": 11652 }, { "epoch": 1.48, "grad_norm": 0.6233772364002257, "learning_rate": 1.6424560374321713e-06, "loss": 0.4446, "step": 11653 }, { "epoch": 1.48, "grad_norm": 0.6564702793842595, "learning_rate": 1.6416917070346534e-06, "loss": 0.5211, "step": 11654 }, { "epoch": 1.48, "grad_norm": 0.756661432533553, "learning_rate": 1.6409275195895892e-06, "loss": 0.5062, "step": 11655 }, { "epoch": 1.48, "grad_norm": 0.6899665087428536, "learning_rate": 1.6401634751295047e-06, "loss": 0.5137, "step": 11656 }, { "epoch": 1.49, "grad_norm": 0.7044619148972615, "learning_rate": 1.6393995736869267e-06, "loss": 0.477, "step": 11657 }, { "epoch": 1.49, "grad_norm": 0.6246292546745654, "learning_rate": 1.6386358152943682e-06, "loss": 0.4564, "step": 11658 }, { "epoch": 1.49, "grad_norm": 0.5545051837420987, "learning_rate": 1.6378721999843428e-06, "loss": 0.4251, "step": 11659 }, { "epoch": 1.49, "grad_norm": 0.5675673493529451, "learning_rate": 1.6371087277893527e-06, "loss": 0.4294, "step": 11660 }, { "epoch": 1.49, "grad_norm": 0.6752120206646379, "learning_rate": 1.6363453987418997e-06, "loss": 0.487, "step": 11661 }, { "epoch": 1.49, "grad_norm": 0.7313314388976673, "learning_rate": 1.6355822128744725e-06, "loss": 0.546, "step": 11662 }, { "epoch": 1.49, "grad_norm": 0.7567602038825845, "learning_rate": 1.6348191702195593e-06, "loss": 0.5424, "step": 11663 }, { "epoch": 1.49, "grad_norm": 0.7669195468100269, "learning_rate": 1.6340562708096419e-06, "loss": 0.5364, "step": 11664 }, { "epoch": 1.49, "grad_norm": 0.7715835948915516, "learning_rate": 1.6332935146771917e-06, "loss": 0.5629, "step": 11665 }, { "epoch": 1.49, "grad_norm": 0.6390565392200357, "learning_rate": 1.6325309018546798e-06, "loss": 0.458, "step": 11666 }, { "epoch": 1.49, "grad_norm": 0.7582914358538736, "learning_rate": 1.6317684323745648e-06, "loss": 0.4995, "step": 11667 }, { "epoch": 1.49, "grad_norm": 0.5913005676358329, "learning_rate": 1.6310061062693061e-06, "loss": 0.4314, "step": 11668 }, { "epoch": 1.49, "grad_norm": 0.6120843872338068, "learning_rate": 1.6302439235713496e-06, "loss": 0.4709, "step": 11669 }, { "epoch": 1.49, "grad_norm": 0.7248412225181978, "learning_rate": 1.6294818843131432e-06, "loss": 0.4963, "step": 11670 }, { "epoch": 1.49, "grad_norm": 0.6743556696793921, "learning_rate": 1.6287199885271205e-06, "loss": 0.4737, "step": 11671 }, { "epoch": 1.49, "grad_norm": 0.706868681575387, "learning_rate": 1.6279582362457165e-06, "loss": 0.5278, "step": 11672 }, { "epoch": 1.49, "grad_norm": 0.618794612795796, "learning_rate": 1.6271966275013529e-06, "loss": 0.4242, "step": 11673 }, { "epoch": 1.49, "grad_norm": 0.6514651043071733, "learning_rate": 1.626435162326453e-06, "loss": 0.4208, "step": 11674 }, { "epoch": 1.49, "grad_norm": 0.6757377179089523, "learning_rate": 1.6256738407534245e-06, "loss": 0.5056, "step": 11675 }, { "epoch": 1.49, "grad_norm": 1.6786881160375586, "learning_rate": 1.624912662814681e-06, "loss": 0.5572, "step": 11676 }, { "epoch": 1.49, "grad_norm": 0.7211068813967096, "learning_rate": 1.6241516285426183e-06, "loss": 0.5078, "step": 11677 }, { "epoch": 1.49, "grad_norm": 0.6589763639653017, "learning_rate": 1.623390737969635e-06, "loss": 0.4542, "step": 11678 }, { "epoch": 1.49, "grad_norm": 0.5719843392074726, "learning_rate": 1.6226299911281163e-06, "loss": 0.4577, "step": 11679 }, { "epoch": 1.49, "grad_norm": 0.7336040203804371, "learning_rate": 1.6218693880504482e-06, "loss": 0.5266, "step": 11680 }, { "epoch": 1.49, "grad_norm": 0.6448540986105957, "learning_rate": 1.6211089287690034e-06, "loss": 0.4804, "step": 11681 }, { "epoch": 1.49, "grad_norm": 0.7955898616096426, "learning_rate": 1.6203486133161555e-06, "loss": 0.5364, "step": 11682 }, { "epoch": 1.49, "grad_norm": 0.7468945140372171, "learning_rate": 1.6195884417242663e-06, "loss": 0.4587, "step": 11683 }, { "epoch": 1.49, "grad_norm": 0.989954219237031, "learning_rate": 1.618828414025696e-06, "loss": 0.5095, "step": 11684 }, { "epoch": 1.49, "grad_norm": 0.7369868046092862, "learning_rate": 1.6180685302527938e-06, "loss": 0.5236, "step": 11685 }, { "epoch": 1.49, "grad_norm": 0.7431057810762721, "learning_rate": 1.6173087904379091e-06, "loss": 0.5428, "step": 11686 }, { "epoch": 1.49, "grad_norm": 0.678718643119146, "learning_rate": 1.6165491946133766e-06, "loss": 0.4309, "step": 11687 }, { "epoch": 1.49, "grad_norm": 0.656017728927584, "learning_rate": 1.6157897428115354e-06, "loss": 0.4556, "step": 11688 }, { "epoch": 1.49, "grad_norm": 0.6436879738085989, "learning_rate": 1.6150304350647095e-06, "loss": 0.4862, "step": 11689 }, { "epoch": 1.49, "grad_norm": 0.7194150380350957, "learning_rate": 1.6142712714052223e-06, "loss": 0.4799, "step": 11690 }, { "epoch": 1.49, "grad_norm": 1.0333355860260767, "learning_rate": 1.6135122518653868e-06, "loss": 0.4995, "step": 11691 }, { "epoch": 1.49, "grad_norm": 0.8593372914682009, "learning_rate": 1.6127533764775143e-06, "loss": 0.5552, "step": 11692 }, { "epoch": 1.49, "grad_norm": 0.7344956407632456, "learning_rate": 1.611994645273905e-06, "loss": 0.5142, "step": 11693 }, { "epoch": 1.49, "grad_norm": 0.618078695798422, "learning_rate": 1.6112360582868586e-06, "loss": 0.4296, "step": 11694 }, { "epoch": 1.49, "grad_norm": 0.616708423446725, "learning_rate": 1.6104776155486623e-06, "loss": 0.4659, "step": 11695 }, { "epoch": 1.49, "grad_norm": 0.6221525712560697, "learning_rate": 1.6097193170916048e-06, "loss": 0.5269, "step": 11696 }, { "epoch": 1.49, "grad_norm": 0.8076474980628993, "learning_rate": 1.6089611629479602e-06, "loss": 0.5579, "step": 11697 }, { "epoch": 1.49, "grad_norm": 0.7357107454652001, "learning_rate": 1.6082031531500047e-06, "loss": 0.4817, "step": 11698 }, { "epoch": 1.49, "grad_norm": 0.5882972688449865, "learning_rate": 1.60744528773e-06, "loss": 0.4384, "step": 11699 }, { "epoch": 1.49, "grad_norm": 0.5990767493900242, "learning_rate": 1.6066875667202093e-06, "loss": 0.4412, "step": 11700 }, { "epoch": 1.49, "grad_norm": 0.7346472202613129, "learning_rate": 1.6059299901528847e-06, "loss": 0.4271, "step": 11701 }, { "epoch": 1.49, "grad_norm": 0.7010105433833499, "learning_rate": 1.6051725580602762e-06, "loss": 0.4774, "step": 11702 }, { "epoch": 1.49, "grad_norm": 0.6691420285863616, "learning_rate": 1.6044152704746224e-06, "loss": 0.4907, "step": 11703 }, { "epoch": 1.49, "grad_norm": 0.7181996896352522, "learning_rate": 1.603658127428161e-06, "loss": 0.523, "step": 11704 }, { "epoch": 1.49, "grad_norm": 0.642430804792435, "learning_rate": 1.6029011289531183e-06, "loss": 0.4818, "step": 11705 }, { "epoch": 1.49, "grad_norm": 0.7907351539196825, "learning_rate": 1.6021442750817208e-06, "loss": 0.5945, "step": 11706 }, { "epoch": 1.49, "grad_norm": 0.7397152098901558, "learning_rate": 1.601387565846182e-06, "loss": 0.5181, "step": 11707 }, { "epoch": 1.49, "grad_norm": 0.6363073864898816, "learning_rate": 1.6006310012787156e-06, "loss": 0.4721, "step": 11708 }, { "epoch": 1.49, "grad_norm": 0.9345310392348628, "learning_rate": 1.599874581411523e-06, "loss": 0.5058, "step": 11709 }, { "epoch": 1.49, "grad_norm": 0.7084655441965263, "learning_rate": 1.5991183062768057e-06, "loss": 0.4809, "step": 11710 }, { "epoch": 1.49, "grad_norm": 0.7529786534217483, "learning_rate": 1.598362175906753e-06, "loss": 0.4894, "step": 11711 }, { "epoch": 1.49, "grad_norm": 0.7560265271118195, "learning_rate": 1.5976061903335526e-06, "loss": 0.5485, "step": 11712 }, { "epoch": 1.49, "grad_norm": 0.7445073342863389, "learning_rate": 1.5968503495893855e-06, "loss": 0.5085, "step": 11713 }, { "epoch": 1.49, "grad_norm": 0.7716263320602863, "learning_rate": 1.5960946537064226e-06, "loss": 0.511, "step": 11714 }, { "epoch": 1.49, "grad_norm": 0.5971113743480645, "learning_rate": 1.5953391027168342e-06, "loss": 0.4205, "step": 11715 }, { "epoch": 1.49, "grad_norm": 0.6234449235150987, "learning_rate": 1.5945836966527794e-06, "loss": 0.5052, "step": 11716 }, { "epoch": 1.49, "grad_norm": 0.7259830708394077, "learning_rate": 1.593828435546414e-06, "loss": 0.4817, "step": 11717 }, { "epoch": 1.49, "grad_norm": 0.6201405993008865, "learning_rate": 1.5930733194298897e-06, "loss": 0.4132, "step": 11718 }, { "epoch": 1.49, "grad_norm": 0.6548015460817552, "learning_rate": 1.5923183483353455e-06, "loss": 0.4445, "step": 11719 }, { "epoch": 1.49, "grad_norm": 0.5760114762894553, "learning_rate": 1.5915635222949212e-06, "loss": 0.4694, "step": 11720 }, { "epoch": 1.49, "grad_norm": 0.7039268430095816, "learning_rate": 1.590808841340744e-06, "loss": 0.5053, "step": 11721 }, { "epoch": 1.49, "grad_norm": 0.5600014365888779, "learning_rate": 1.5900543055049421e-06, "loss": 0.4094, "step": 11722 }, { "epoch": 1.49, "grad_norm": 0.6084876811002675, "learning_rate": 1.5892999148196302e-06, "loss": 0.4377, "step": 11723 }, { "epoch": 1.49, "grad_norm": 0.6953852629916997, "learning_rate": 1.5885456693169221e-06, "loss": 0.4994, "step": 11724 }, { "epoch": 1.49, "grad_norm": 0.6795727701064831, "learning_rate": 1.5877915690289252e-06, "loss": 0.477, "step": 11725 }, { "epoch": 1.49, "grad_norm": 0.5947685028672042, "learning_rate": 1.5870376139877353e-06, "loss": 0.4172, "step": 11726 }, { "epoch": 1.49, "grad_norm": 0.7346432079184089, "learning_rate": 1.5862838042254498e-06, "loss": 0.4619, "step": 11727 }, { "epoch": 1.49, "grad_norm": 0.8121603884955976, "learning_rate": 1.5855301397741518e-06, "loss": 0.5134, "step": 11728 }, { "epoch": 1.49, "grad_norm": 0.658093977326385, "learning_rate": 1.584776620665927e-06, "loss": 0.4814, "step": 11729 }, { "epoch": 1.49, "grad_norm": 0.6034520885958667, "learning_rate": 1.584023246932846e-06, "loss": 0.4365, "step": 11730 }, { "epoch": 1.49, "grad_norm": 0.5632148660641273, "learning_rate": 1.5832700186069815e-06, "loss": 0.3963, "step": 11731 }, { "epoch": 1.49, "grad_norm": 0.6144137899006358, "learning_rate": 1.5825169357203918e-06, "loss": 0.4759, "step": 11732 }, { "epoch": 1.49, "grad_norm": 0.688181547907297, "learning_rate": 1.5817639983051376e-06, "loss": 0.5318, "step": 11733 }, { "epoch": 1.49, "grad_norm": 0.7524833399109788, "learning_rate": 1.5810112063932649e-06, "loss": 0.5195, "step": 11734 }, { "epoch": 1.49, "grad_norm": 0.6532304991786136, "learning_rate": 1.5802585600168214e-06, "loss": 0.455, "step": 11735 }, { "epoch": 1.5, "grad_norm": 0.5896838061345844, "learning_rate": 1.5795060592078415e-06, "loss": 0.47, "step": 11736 }, { "epoch": 1.5, "grad_norm": 0.6407654360264194, "learning_rate": 1.578753703998358e-06, "loss": 0.4353, "step": 11737 }, { "epoch": 1.5, "grad_norm": 0.6875393592874859, "learning_rate": 1.578001494420397e-06, "loss": 0.4363, "step": 11738 }, { "epoch": 1.5, "grad_norm": 0.6161792859934612, "learning_rate": 1.5772494305059788e-06, "loss": 0.4831, "step": 11739 }, { "epoch": 1.5, "grad_norm": 0.6526223812951869, "learning_rate": 1.576497512287113e-06, "loss": 0.5036, "step": 11740 }, { "epoch": 1.5, "grad_norm": 0.6491918024259562, "learning_rate": 1.5757457397958097e-06, "loss": 0.4523, "step": 11741 }, { "epoch": 1.5, "grad_norm": 0.7540643503895571, "learning_rate": 1.574994113064066e-06, "loss": 0.4948, "step": 11742 }, { "epoch": 1.5, "grad_norm": 0.5779330418048424, "learning_rate": 1.5742426321238806e-06, "loss": 0.441, "step": 11743 }, { "epoch": 1.5, "grad_norm": 0.7225246613868435, "learning_rate": 1.573491297007237e-06, "loss": 0.453, "step": 11744 }, { "epoch": 1.5, "grad_norm": 0.7379107104095686, "learning_rate": 1.5727401077461214e-06, "loss": 0.5373, "step": 11745 }, { "epoch": 1.5, "grad_norm": 0.7396805372132097, "learning_rate": 1.5719890643725055e-06, "loss": 0.5861, "step": 11746 }, { "epoch": 1.5, "grad_norm": 0.7411962699226632, "learning_rate": 1.5712381669183625e-06, "loss": 0.527, "step": 11747 }, { "epoch": 1.5, "grad_norm": 0.7519719020075472, "learning_rate": 1.5704874154156518e-06, "loss": 0.5686, "step": 11748 }, { "epoch": 1.5, "grad_norm": 0.968156842929008, "learning_rate": 1.5697368098963334e-06, "loss": 0.5331, "step": 11749 }, { "epoch": 1.5, "grad_norm": 0.7384914180403779, "learning_rate": 1.5689863503923568e-06, "loss": 0.4788, "step": 11750 }, { "epoch": 1.5, "grad_norm": 0.5429395082431272, "learning_rate": 1.5682360369356698e-06, "loss": 0.402, "step": 11751 }, { "epoch": 1.5, "grad_norm": 0.5750268987020277, "learning_rate": 1.5674858695582063e-06, "loss": 0.4096, "step": 11752 }, { "epoch": 1.5, "grad_norm": 0.654286318930066, "learning_rate": 1.5667358482919022e-06, "loss": 0.438, "step": 11753 }, { "epoch": 1.5, "grad_norm": 0.6134140235453149, "learning_rate": 1.56598597316868e-06, "loss": 0.4763, "step": 11754 }, { "epoch": 1.5, "grad_norm": 0.7039122258646313, "learning_rate": 1.5652362442204632e-06, "loss": 0.4914, "step": 11755 }, { "epoch": 1.5, "grad_norm": 0.6116849157370886, "learning_rate": 1.564486661479162e-06, "loss": 0.4096, "step": 11756 }, { "epoch": 1.5, "grad_norm": 0.7165708027256114, "learning_rate": 1.5637372249766869e-06, "loss": 0.4889, "step": 11757 }, { "epoch": 1.5, "grad_norm": 0.6299564560444582, "learning_rate": 1.5629879347449362e-06, "loss": 0.4847, "step": 11758 }, { "epoch": 1.5, "grad_norm": 0.5888890148812191, "learning_rate": 1.562238790815807e-06, "loss": 0.4118, "step": 11759 }, { "epoch": 1.5, "grad_norm": 0.7052670554341612, "learning_rate": 1.5614897932211854e-06, "loss": 0.5085, "step": 11760 }, { "epoch": 1.5, "grad_norm": 0.8093611104067335, "learning_rate": 1.5607409419929576e-06, "loss": 0.5263, "step": 11761 }, { "epoch": 1.5, "grad_norm": 0.6389577030413439, "learning_rate": 1.559992237162994e-06, "loss": 0.4857, "step": 11762 }, { "epoch": 1.5, "grad_norm": 0.5891752639213359, "learning_rate": 1.559243678763171e-06, "loss": 0.456, "step": 11763 }, { "epoch": 1.5, "grad_norm": 0.6419411020546483, "learning_rate": 1.5584952668253484e-06, "loss": 0.4509, "step": 11764 }, { "epoch": 1.5, "grad_norm": 0.6148366827308623, "learning_rate": 1.5577470013813862e-06, "loss": 0.4905, "step": 11765 }, { "epoch": 1.5, "grad_norm": 0.7111504954333165, "learning_rate": 1.5569988824631327e-06, "loss": 0.5505, "step": 11766 }, { "epoch": 1.5, "grad_norm": 0.7666176035982654, "learning_rate": 1.556250910102436e-06, "loss": 0.5043, "step": 11767 }, { "epoch": 1.5, "grad_norm": 0.5837982535188084, "learning_rate": 1.555503084331132e-06, "loss": 0.4352, "step": 11768 }, { "epoch": 1.5, "grad_norm": 0.6010695453524361, "learning_rate": 1.5547554051810565e-06, "loss": 0.4371, "step": 11769 }, { "epoch": 1.5, "grad_norm": 0.6264840395814519, "learning_rate": 1.5540078726840319e-06, "loss": 0.4703, "step": 11770 }, { "epoch": 1.5, "grad_norm": 0.68022669008752, "learning_rate": 1.553260486871882e-06, "loss": 0.514, "step": 11771 }, { "epoch": 1.5, "grad_norm": 0.7956379103897336, "learning_rate": 1.5525132477764176e-06, "loss": 0.5121, "step": 11772 }, { "epoch": 1.5, "grad_norm": 0.7154696553375018, "learning_rate": 1.5517661554294494e-06, "loss": 0.4875, "step": 11773 }, { "epoch": 1.5, "grad_norm": 0.6287273317750448, "learning_rate": 1.5510192098627752e-06, "loss": 0.4581, "step": 11774 }, { "epoch": 1.5, "grad_norm": 0.6814051416071311, "learning_rate": 1.550272411108192e-06, "loss": 0.478, "step": 11775 }, { "epoch": 1.5, "grad_norm": 0.6542467001588551, "learning_rate": 1.5495257591974894e-06, "loss": 0.4654, "step": 11776 }, { "epoch": 1.5, "grad_norm": 0.653667677085657, "learning_rate": 1.548779254162448e-06, "loss": 0.5436, "step": 11777 }, { "epoch": 1.5, "grad_norm": 0.802816849316158, "learning_rate": 1.5480328960348456e-06, "loss": 0.4777, "step": 11778 }, { "epoch": 1.5, "grad_norm": 0.5988588495379926, "learning_rate": 1.547286684846453e-06, "loss": 0.4143, "step": 11779 }, { "epoch": 1.5, "grad_norm": 0.6680500243335126, "learning_rate": 1.5465406206290318e-06, "loss": 0.5123, "step": 11780 }, { "epoch": 1.5, "grad_norm": 0.8222643093206636, "learning_rate": 1.5457947034143417e-06, "loss": 0.5485, "step": 11781 }, { "epoch": 1.5, "grad_norm": 0.6696696806370672, "learning_rate": 1.5450489332341317e-06, "loss": 0.4826, "step": 11782 }, { "epoch": 1.5, "grad_norm": 0.771063794677532, "learning_rate": 1.5443033101201498e-06, "loss": 0.5505, "step": 11783 }, { "epoch": 1.5, "grad_norm": 0.6418626006642414, "learning_rate": 1.5435578341041313e-06, "loss": 0.4411, "step": 11784 }, { "epoch": 1.5, "grad_norm": 0.5483683083025235, "learning_rate": 1.5428125052178128e-06, "loss": 0.4549, "step": 11785 }, { "epoch": 1.5, "grad_norm": 0.7608689896695447, "learning_rate": 1.5420673234929162e-06, "loss": 0.523, "step": 11786 }, { "epoch": 1.5, "grad_norm": 0.7668921287249512, "learning_rate": 1.541322288961164e-06, "loss": 0.5205, "step": 11787 }, { "epoch": 1.5, "grad_norm": 0.8093078290301612, "learning_rate": 1.5405774016542707e-06, "loss": 0.5532, "step": 11788 }, { "epoch": 1.5, "grad_norm": 0.8690668151779515, "learning_rate": 1.5398326616039415e-06, "loss": 0.5387, "step": 11789 }, { "epoch": 1.5, "grad_norm": 0.6192319007616564, "learning_rate": 1.5390880688418808e-06, "loss": 0.47, "step": 11790 }, { "epoch": 1.5, "grad_norm": 0.6493168836358363, "learning_rate": 1.538343623399779e-06, "loss": 0.4736, "step": 11791 }, { "epoch": 1.5, "grad_norm": 0.7391252325169428, "learning_rate": 1.5375993253093298e-06, "loss": 0.4758, "step": 11792 }, { "epoch": 1.5, "grad_norm": 0.6755645686739995, "learning_rate": 1.5368551746022109e-06, "loss": 0.486, "step": 11793 }, { "epoch": 1.5, "grad_norm": 0.7293311495375693, "learning_rate": 1.5361111713101017e-06, "loss": 0.5299, "step": 11794 }, { "epoch": 1.5, "grad_norm": 0.7938046028945703, "learning_rate": 1.5353673154646703e-06, "loss": 0.4883, "step": 11795 }, { "epoch": 1.5, "grad_norm": 0.7300984710314583, "learning_rate": 1.5346236070975823e-06, "loss": 0.5228, "step": 11796 }, { "epoch": 1.5, "grad_norm": 0.8588919596365745, "learning_rate": 1.5338800462404918e-06, "loss": 0.5292, "step": 11797 }, { "epoch": 1.5, "grad_norm": 0.7817866249813722, "learning_rate": 1.533136632925053e-06, "loss": 0.5224, "step": 11798 }, { "epoch": 1.5, "grad_norm": 0.6312476615578303, "learning_rate": 1.532393367182906e-06, "loss": 0.4807, "step": 11799 }, { "epoch": 1.5, "grad_norm": 0.6032819000982859, "learning_rate": 1.5316502490456963e-06, "loss": 0.4443, "step": 11800 }, { "epoch": 1.5, "grad_norm": 0.7333570353653186, "learning_rate": 1.5309072785450502e-06, "loss": 0.5031, "step": 11801 }, { "epoch": 1.5, "grad_norm": 0.65520263007275, "learning_rate": 1.5301644557125973e-06, "loss": 0.4434, "step": 11802 }, { "epoch": 1.5, "grad_norm": 0.6385059594453464, "learning_rate": 1.5294217805799539e-06, "loss": 0.4637, "step": 11803 }, { "epoch": 1.5, "grad_norm": 0.7016805321237948, "learning_rate": 1.5286792531787365e-06, "loss": 0.4837, "step": 11804 }, { "epoch": 1.5, "grad_norm": 0.8813445668346723, "learning_rate": 1.5279368735405487e-06, "loss": 0.5593, "step": 11805 }, { "epoch": 1.5, "grad_norm": 0.6998779931818744, "learning_rate": 1.5271946416969945e-06, "loss": 0.4976, "step": 11806 }, { "epoch": 1.5, "grad_norm": 0.75638765995227, "learning_rate": 1.5264525576796663e-06, "loss": 0.5477, "step": 11807 }, { "epoch": 1.5, "grad_norm": 0.7014983236205438, "learning_rate": 1.5257106215201533e-06, "loss": 0.4773, "step": 11808 }, { "epoch": 1.5, "grad_norm": 0.5784815778331148, "learning_rate": 1.5249688332500362e-06, "loss": 0.4077, "step": 11809 }, { "epoch": 1.5, "grad_norm": 0.6860376601882198, "learning_rate": 1.5242271929008923e-06, "loss": 0.5176, "step": 11810 }, { "epoch": 1.5, "grad_norm": 0.708522576587283, "learning_rate": 1.523485700504287e-06, "loss": 0.4995, "step": 11811 }, { "epoch": 1.5, "grad_norm": 0.8573932136254214, "learning_rate": 1.5227443560917894e-06, "loss": 0.5143, "step": 11812 }, { "epoch": 1.5, "grad_norm": 0.8141553424477549, "learning_rate": 1.5220031596949508e-06, "loss": 0.47, "step": 11813 }, { "epoch": 1.51, "grad_norm": 0.799925582347533, "learning_rate": 1.5212621113453252e-06, "loss": 0.5356, "step": 11814 }, { "epoch": 1.51, "grad_norm": 0.735865505739735, "learning_rate": 1.5205212110744538e-06, "loss": 0.5113, "step": 11815 }, { "epoch": 1.51, "grad_norm": 0.5881047136717945, "learning_rate": 1.519780458913877e-06, "loss": 0.4616, "step": 11816 }, { "epoch": 1.51, "grad_norm": 0.6602627792109644, "learning_rate": 1.519039854895123e-06, "loss": 0.5027, "step": 11817 }, { "epoch": 1.51, "grad_norm": 0.8064955582453905, "learning_rate": 1.5182993990497208e-06, "loss": 0.4981, "step": 11818 }, { "epoch": 1.51, "grad_norm": 0.6263308430897092, "learning_rate": 1.5175590914091853e-06, "loss": 0.5091, "step": 11819 }, { "epoch": 1.51, "grad_norm": 0.7712133292642409, "learning_rate": 1.516818932005033e-06, "loss": 0.5092, "step": 11820 }, { "epoch": 1.51, "grad_norm": 0.589759177810315, "learning_rate": 1.5160789208687665e-06, "loss": 0.4382, "step": 11821 }, { "epoch": 1.51, "grad_norm": 0.5582565081809934, "learning_rate": 1.5153390580318888e-06, "loss": 0.438, "step": 11822 }, { "epoch": 1.51, "grad_norm": 0.6389886936717366, "learning_rate": 1.5145993435258905e-06, "loss": 0.4795, "step": 11823 }, { "epoch": 1.51, "grad_norm": 0.716650432743136, "learning_rate": 1.5138597773822606e-06, "loss": 0.464, "step": 11824 }, { "epoch": 1.51, "grad_norm": 0.6478239783158753, "learning_rate": 1.5131203596324795e-06, "loss": 0.5096, "step": 11825 }, { "epoch": 1.51, "grad_norm": 0.6454641410197964, "learning_rate": 1.5123810903080239e-06, "loss": 0.4651, "step": 11826 }, { "epoch": 1.51, "grad_norm": 0.6202316761037979, "learning_rate": 1.5116419694403588e-06, "loss": 0.4546, "step": 11827 }, { "epoch": 1.51, "grad_norm": 0.6202174516241951, "learning_rate": 1.5109029970609495e-06, "loss": 0.4357, "step": 11828 }, { "epoch": 1.51, "grad_norm": 0.6908969314066419, "learning_rate": 1.5101641732012484e-06, "loss": 0.5059, "step": 11829 }, { "epoch": 1.51, "grad_norm": 0.7185467599864933, "learning_rate": 1.509425497892708e-06, "loss": 0.513, "step": 11830 }, { "epoch": 1.51, "grad_norm": 0.7064557693082774, "learning_rate": 1.5086869711667684e-06, "loss": 0.4665, "step": 11831 }, { "epoch": 1.51, "grad_norm": 0.578798687162687, "learning_rate": 1.50794859305487e-06, "loss": 0.4932, "step": 11832 }, { "epoch": 1.51, "grad_norm": 0.6905663030653478, "learning_rate": 1.5072103635884384e-06, "loss": 0.5603, "step": 11833 }, { "epoch": 1.51, "grad_norm": 0.7510664241129372, "learning_rate": 1.5064722827989026e-06, "loss": 0.4585, "step": 11834 }, { "epoch": 1.51, "grad_norm": 0.6273943909358254, "learning_rate": 1.505734350717676e-06, "loss": 0.4298, "step": 11835 }, { "epoch": 1.51, "grad_norm": 0.6419214079985587, "learning_rate": 1.504996567376172e-06, "loss": 0.4343, "step": 11836 }, { "epoch": 1.51, "grad_norm": 0.678229937919406, "learning_rate": 1.5042589328057977e-06, "loss": 0.4462, "step": 11837 }, { "epoch": 1.51, "grad_norm": 0.6283421918971349, "learning_rate": 1.5035214470379478e-06, "loss": 0.455, "step": 11838 }, { "epoch": 1.51, "grad_norm": 0.6304615540000009, "learning_rate": 1.5027841101040186e-06, "loss": 0.4386, "step": 11839 }, { "epoch": 1.51, "grad_norm": 0.703904198555036, "learning_rate": 1.5020469220353928e-06, "loss": 0.4178, "step": 11840 }, { "epoch": 1.51, "grad_norm": 0.6089485199679933, "learning_rate": 1.5013098828634514e-06, "loss": 0.4245, "step": 11841 }, { "epoch": 1.51, "grad_norm": 0.7898261139824619, "learning_rate": 1.50057299261957e-06, "loss": 0.5052, "step": 11842 }, { "epoch": 1.51, "grad_norm": 0.7025556079504152, "learning_rate": 1.4998362513351118e-06, "loss": 0.5037, "step": 11843 }, { "epoch": 1.51, "grad_norm": 0.5482683859555755, "learning_rate": 1.4990996590414408e-06, "loss": 0.438, "step": 11844 }, { "epoch": 1.51, "grad_norm": 0.5988178399422383, "learning_rate": 1.4983632157699091e-06, "loss": 0.4193, "step": 11845 }, { "epoch": 1.51, "grad_norm": 0.6101592647269215, "learning_rate": 1.4976269215518667e-06, "loss": 0.4284, "step": 11846 }, { "epoch": 1.51, "grad_norm": 0.6584350770773381, "learning_rate": 1.4968907764186535e-06, "loss": 0.4462, "step": 11847 }, { "epoch": 1.51, "grad_norm": 0.644409796959749, "learning_rate": 1.4961547804016046e-06, "loss": 0.4285, "step": 11848 }, { "epoch": 1.51, "grad_norm": 0.7596332590127007, "learning_rate": 1.4954189335320524e-06, "loss": 0.5522, "step": 11849 }, { "epoch": 1.51, "grad_norm": 0.7126415012322277, "learning_rate": 1.4946832358413154e-06, "loss": 0.4777, "step": 11850 }, { "epoch": 1.51, "grad_norm": 0.8145319375764453, "learning_rate": 1.4939476873607129e-06, "loss": 0.5306, "step": 11851 }, { "epoch": 1.51, "grad_norm": 0.5730804762496607, "learning_rate": 1.4932122881215522e-06, "loss": 0.4582, "step": 11852 }, { "epoch": 1.51, "grad_norm": 0.7628296180292785, "learning_rate": 1.49247703815514e-06, "loss": 0.5631, "step": 11853 }, { "epoch": 1.51, "grad_norm": 0.7769824708820705, "learning_rate": 1.49174193749277e-06, "loss": 0.5013, "step": 11854 }, { "epoch": 1.51, "grad_norm": 0.6436061963935515, "learning_rate": 1.4910069861657367e-06, "loss": 0.4509, "step": 11855 }, { "epoch": 1.51, "grad_norm": 0.6284176954190089, "learning_rate": 1.4902721842053215e-06, "loss": 0.4675, "step": 11856 }, { "epoch": 1.51, "grad_norm": 0.857460364630511, "learning_rate": 1.489537531642805e-06, "loss": 0.5037, "step": 11857 }, { "epoch": 1.51, "grad_norm": 1.4153407418352368, "learning_rate": 1.4888030285094569e-06, "loss": 0.5133, "step": 11858 }, { "epoch": 1.51, "grad_norm": 0.7606255532426416, "learning_rate": 1.4880686748365453e-06, "loss": 0.5251, "step": 11859 }, { "epoch": 1.51, "grad_norm": 0.7559046403360551, "learning_rate": 1.4873344706553255e-06, "loss": 0.4628, "step": 11860 }, { "epoch": 1.51, "grad_norm": 0.7370367427270319, "learning_rate": 1.4866004159970527e-06, "loss": 0.5052, "step": 11861 }, { "epoch": 1.51, "grad_norm": 0.8421025089555793, "learning_rate": 1.4858665108929732e-06, "loss": 0.5515, "step": 11862 }, { "epoch": 1.51, "grad_norm": 0.7308373939186672, "learning_rate": 1.4851327553743283e-06, "loss": 0.528, "step": 11863 }, { "epoch": 1.51, "grad_norm": 0.7749088901483504, "learning_rate": 1.484399149472348e-06, "loss": 0.4869, "step": 11864 }, { "epoch": 1.51, "grad_norm": 0.5833079333836493, "learning_rate": 1.4836656932182635e-06, "loss": 0.4123, "step": 11865 }, { "epoch": 1.51, "grad_norm": 0.7145071771547642, "learning_rate": 1.4829323866432927e-06, "loss": 0.5104, "step": 11866 }, { "epoch": 1.51, "grad_norm": 0.7416386903058149, "learning_rate": 1.4821992297786524e-06, "loss": 0.5, "step": 11867 }, { "epoch": 1.51, "grad_norm": 0.717791106770553, "learning_rate": 1.4814662226555482e-06, "loss": 0.4784, "step": 11868 }, { "epoch": 1.51, "grad_norm": 0.7302412626467278, "learning_rate": 1.4807333653051848e-06, "loss": 0.5481, "step": 11869 }, { "epoch": 1.51, "grad_norm": 0.7592125182075823, "learning_rate": 1.4800006577587545e-06, "loss": 0.5285, "step": 11870 }, { "epoch": 1.51, "grad_norm": 0.6763326314552282, "learning_rate": 1.4792681000474496e-06, "loss": 0.4864, "step": 11871 }, { "epoch": 1.51, "grad_norm": 0.6598942039187701, "learning_rate": 1.4785356922024491e-06, "loss": 0.4778, "step": 11872 }, { "epoch": 1.51, "grad_norm": 0.575117547414073, "learning_rate": 1.4778034342549313e-06, "loss": 0.4069, "step": 11873 }, { "epoch": 1.51, "grad_norm": 0.643936184193435, "learning_rate": 1.4770713262360664e-06, "loss": 0.47, "step": 11874 }, { "epoch": 1.51, "grad_norm": 0.5687847044527354, "learning_rate": 1.476339368177019e-06, "loss": 0.4304, "step": 11875 }, { "epoch": 1.51, "grad_norm": 0.6134730889666818, "learning_rate": 1.4756075601089426e-06, "loss": 0.4582, "step": 11876 }, { "epoch": 1.51, "grad_norm": 0.574258200621919, "learning_rate": 1.4748759020629916e-06, "loss": 0.4603, "step": 11877 }, { "epoch": 1.51, "grad_norm": 0.7289514713332602, "learning_rate": 1.4741443940703076e-06, "loss": 0.4448, "step": 11878 }, { "epoch": 1.51, "grad_norm": 0.6754957264099952, "learning_rate": 1.4734130361620308e-06, "loss": 0.4588, "step": 11879 }, { "epoch": 1.51, "grad_norm": 0.5779789544473176, "learning_rate": 1.4726818283692907e-06, "loss": 0.4729, "step": 11880 }, { "epoch": 1.51, "grad_norm": 0.7579048188425673, "learning_rate": 1.4719507707232145e-06, "loss": 0.5336, "step": 11881 }, { "epoch": 1.51, "grad_norm": 0.7298442253391793, "learning_rate": 1.471219863254919e-06, "loss": 0.5254, "step": 11882 }, { "epoch": 1.51, "grad_norm": 0.6527989485826166, "learning_rate": 1.4704891059955184e-06, "loss": 0.4636, "step": 11883 }, { "epoch": 1.51, "grad_norm": 0.6276337234324153, "learning_rate": 1.469758498976117e-06, "loss": 0.4765, "step": 11884 }, { "epoch": 1.51, "grad_norm": 0.7632291157898848, "learning_rate": 1.4690280422278164e-06, "loss": 0.5636, "step": 11885 }, { "epoch": 1.51, "grad_norm": 0.7063143936721762, "learning_rate": 1.4682977357817057e-06, "loss": 0.5329, "step": 11886 }, { "epoch": 1.51, "grad_norm": 0.6768998483318774, "learning_rate": 1.4675675796688777e-06, "loss": 0.4854, "step": 11887 }, { "epoch": 1.51, "grad_norm": 0.6882104109259862, "learning_rate": 1.4668375739204083e-06, "loss": 0.4776, "step": 11888 }, { "epoch": 1.51, "grad_norm": 0.7901680166928836, "learning_rate": 1.4661077185673745e-06, "loss": 0.578, "step": 11889 }, { "epoch": 1.51, "grad_norm": 0.8936742457878368, "learning_rate": 1.4653780136408407e-06, "loss": 0.5461, "step": 11890 }, { "epoch": 1.51, "grad_norm": 0.7593327976144598, "learning_rate": 1.4646484591718717e-06, "loss": 0.4654, "step": 11891 }, { "epoch": 1.52, "grad_norm": 0.6599733636141957, "learning_rate": 1.4639190551915189e-06, "loss": 0.4189, "step": 11892 }, { "epoch": 1.52, "grad_norm": 0.7696281045056309, "learning_rate": 1.4631898017308338e-06, "loss": 0.4771, "step": 11893 }, { "epoch": 1.52, "grad_norm": 0.8125166737913756, "learning_rate": 1.462460698820855e-06, "loss": 0.5116, "step": 11894 }, { "epoch": 1.52, "grad_norm": 0.760748975049582, "learning_rate": 1.4617317464926217e-06, "loss": 0.5034, "step": 11895 }, { "epoch": 1.52, "grad_norm": 0.5917640577108612, "learning_rate": 1.4610029447771595e-06, "loss": 0.4541, "step": 11896 }, { "epoch": 1.52, "grad_norm": 0.6254715694753664, "learning_rate": 1.4602742937054942e-06, "loss": 0.4268, "step": 11897 }, { "epoch": 1.52, "grad_norm": 0.6612754649362964, "learning_rate": 1.45954579330864e-06, "loss": 0.5101, "step": 11898 }, { "epoch": 1.52, "grad_norm": 0.7639911655799347, "learning_rate": 1.4588174436176073e-06, "loss": 0.5033, "step": 11899 }, { "epoch": 1.52, "grad_norm": 0.6107291144120331, "learning_rate": 1.4580892446634015e-06, "loss": 0.4513, "step": 11900 }, { "epoch": 1.52, "grad_norm": 0.7302142075022269, "learning_rate": 1.4573611964770168e-06, "loss": 0.4629, "step": 11901 }, { "epoch": 1.52, "grad_norm": 0.6985277886307565, "learning_rate": 1.4566332990894456e-06, "loss": 0.4996, "step": 11902 }, { "epoch": 1.52, "grad_norm": 0.7858628190559802, "learning_rate": 1.455905552531673e-06, "loss": 0.53, "step": 11903 }, { "epoch": 1.52, "grad_norm": 0.6550350598902999, "learning_rate": 1.455177956834674e-06, "loss": 0.484, "step": 11904 }, { "epoch": 1.52, "grad_norm": 0.7473365611735782, "learning_rate": 1.4544505120294239e-06, "loss": 0.5013, "step": 11905 }, { "epoch": 1.52, "grad_norm": 0.7978067914947106, "learning_rate": 1.4537232181468835e-06, "loss": 0.5719, "step": 11906 }, { "epoch": 1.52, "grad_norm": 0.6288148890051874, "learning_rate": 1.4529960752180156e-06, "loss": 0.4793, "step": 11907 }, { "epoch": 1.52, "grad_norm": 0.6194600075601276, "learning_rate": 1.4522690832737679e-06, "loss": 0.4572, "step": 11908 }, { "epoch": 1.52, "grad_norm": 0.7936754033642945, "learning_rate": 1.4515422423450902e-06, "loss": 0.5541, "step": 11909 }, { "epoch": 1.52, "grad_norm": 0.6286049158787025, "learning_rate": 1.4508155524629186e-06, "loss": 0.4746, "step": 11910 }, { "epoch": 1.52, "grad_norm": 0.7241940144325015, "learning_rate": 1.4500890136581875e-06, "loss": 0.516, "step": 11911 }, { "epoch": 1.52, "grad_norm": 0.5373397971462176, "learning_rate": 1.4493626259618242e-06, "loss": 0.3974, "step": 11912 }, { "epoch": 1.52, "grad_norm": 0.6185760583103315, "learning_rate": 1.4486363894047468e-06, "loss": 0.4874, "step": 11913 }, { "epoch": 1.52, "grad_norm": 0.7521964921008295, "learning_rate": 1.447910304017871e-06, "loss": 0.4835, "step": 11914 }, { "epoch": 1.52, "grad_norm": 0.6329308429095662, "learning_rate": 1.4471843698321009e-06, "loss": 0.4769, "step": 11915 }, { "epoch": 1.52, "grad_norm": 0.7325882968022589, "learning_rate": 1.446458586878341e-06, "loss": 0.4859, "step": 11916 }, { "epoch": 1.52, "grad_norm": 0.6366233076546924, "learning_rate": 1.4457329551874816e-06, "loss": 0.4526, "step": 11917 }, { "epoch": 1.52, "grad_norm": 0.6325091054991279, "learning_rate": 1.4450074747904142e-06, "loss": 0.4685, "step": 11918 }, { "epoch": 1.52, "grad_norm": 0.554474044348401, "learning_rate": 1.4442821457180167e-06, "loss": 0.4191, "step": 11919 }, { "epoch": 1.52, "grad_norm": 0.5816684748831272, "learning_rate": 1.443556968001168e-06, "loss": 0.4071, "step": 11920 }, { "epoch": 1.52, "grad_norm": 0.8849647868166594, "learning_rate": 1.4428319416707327e-06, "loss": 0.4318, "step": 11921 }, { "epoch": 1.52, "grad_norm": 0.748242272080491, "learning_rate": 1.4421070667575754e-06, "loss": 0.5431, "step": 11922 }, { "epoch": 1.52, "grad_norm": 0.9153034293356291, "learning_rate": 1.4413823432925483e-06, "loss": 0.5074, "step": 11923 }, { "epoch": 1.52, "grad_norm": 0.622396773485618, "learning_rate": 1.4406577713065061e-06, "loss": 0.4936, "step": 11924 }, { "epoch": 1.52, "grad_norm": 0.7407669943325329, "learning_rate": 1.4399333508302871e-06, "loss": 0.5255, "step": 11925 }, { "epoch": 1.52, "grad_norm": 0.7967093278333663, "learning_rate": 1.4392090818947308e-06, "loss": 0.5271, "step": 11926 }, { "epoch": 1.52, "grad_norm": 0.6152531122051506, "learning_rate": 1.4384849645306631e-06, "loss": 0.4728, "step": 11927 }, { "epoch": 1.52, "grad_norm": 0.8028136064244589, "learning_rate": 1.4377609987689117e-06, "loss": 0.551, "step": 11928 }, { "epoch": 1.52, "grad_norm": 0.6163416288227633, "learning_rate": 1.437037184640289e-06, "loss": 0.4352, "step": 11929 }, { "epoch": 1.52, "grad_norm": 0.9569357815659029, "learning_rate": 1.4363135221756097e-06, "loss": 0.4821, "step": 11930 }, { "epoch": 1.52, "grad_norm": 0.649481211374458, "learning_rate": 1.4355900114056743e-06, "loss": 0.4788, "step": 11931 }, { "epoch": 1.52, "grad_norm": 0.673899160816937, "learning_rate": 1.4348666523612837e-06, "loss": 0.4911, "step": 11932 }, { "epoch": 1.52, "grad_norm": 0.8855748910508573, "learning_rate": 1.434143445073225e-06, "loss": 0.5546, "step": 11933 }, { "epoch": 1.52, "grad_norm": 0.7517144380153586, "learning_rate": 1.433420389572287e-06, "loss": 0.491, "step": 11934 }, { "epoch": 1.52, "grad_norm": 0.6736783602357297, "learning_rate": 1.4326974858892429e-06, "loss": 0.481, "step": 11935 }, { "epoch": 1.52, "grad_norm": 0.7518184370772593, "learning_rate": 1.43197473405487e-06, "loss": 0.4954, "step": 11936 }, { "epoch": 1.52, "grad_norm": 0.684843127693918, "learning_rate": 1.4312521340999297e-06, "loss": 0.4079, "step": 11937 }, { "epoch": 1.52, "grad_norm": 0.7028455966540011, "learning_rate": 1.4305296860551831e-06, "loss": 0.4909, "step": 11938 }, { "epoch": 1.52, "grad_norm": 0.7536326138616248, "learning_rate": 1.4298073899513803e-06, "loss": 0.4917, "step": 11939 }, { "epoch": 1.52, "grad_norm": 0.5923788123906366, "learning_rate": 1.4290852458192695e-06, "loss": 0.4344, "step": 11940 }, { "epoch": 1.52, "grad_norm": 0.5748647703219977, "learning_rate": 1.428363253689587e-06, "loss": 0.4234, "step": 11941 }, { "epoch": 1.52, "grad_norm": 0.5871307327750466, "learning_rate": 1.4276414135930695e-06, "loss": 0.4203, "step": 11942 }, { "epoch": 1.52, "grad_norm": 0.6646586427845788, "learning_rate": 1.4269197255604395e-06, "loss": 0.4819, "step": 11943 }, { "epoch": 1.52, "grad_norm": 0.7639983366964239, "learning_rate": 1.4261981896224209e-06, "loss": 0.4926, "step": 11944 }, { "epoch": 1.52, "grad_norm": 0.5927902522432147, "learning_rate": 1.4254768058097228e-06, "loss": 0.4491, "step": 11945 }, { "epoch": 1.52, "grad_norm": 0.7494902058550442, "learning_rate": 1.4247555741530566e-06, "loss": 0.4972, "step": 11946 }, { "epoch": 1.52, "grad_norm": 0.824565801744356, "learning_rate": 1.4240344946831192e-06, "loss": 0.4909, "step": 11947 }, { "epoch": 1.52, "grad_norm": 0.6245604221413591, "learning_rate": 1.423313567430606e-06, "loss": 0.4654, "step": 11948 }, { "epoch": 1.52, "grad_norm": 0.5799736849700223, "learning_rate": 1.422592792426205e-06, "loss": 0.4471, "step": 11949 }, { "epoch": 1.52, "grad_norm": 0.7624688173524956, "learning_rate": 1.4218721697005984e-06, "loss": 0.4586, "step": 11950 }, { "epoch": 1.52, "grad_norm": 0.6614371141124121, "learning_rate": 1.421151699284458e-06, "loss": 0.5309, "step": 11951 }, { "epoch": 1.52, "grad_norm": 0.746829092014637, "learning_rate": 1.4204313812084552e-06, "loss": 0.4954, "step": 11952 }, { "epoch": 1.52, "grad_norm": 0.6761957546420123, "learning_rate": 1.4197112155032484e-06, "loss": 0.504, "step": 11953 }, { "epoch": 1.52, "grad_norm": 0.6896137298686484, "learning_rate": 1.418991202199495e-06, "loss": 0.4449, "step": 11954 }, { "epoch": 1.52, "grad_norm": 0.5946682175975541, "learning_rate": 1.4182713413278421e-06, "loss": 0.4231, "step": 11955 }, { "epoch": 1.52, "grad_norm": 0.6378199333418438, "learning_rate": 1.4175516329189336e-06, "loss": 0.5385, "step": 11956 }, { "epoch": 1.52, "grad_norm": 0.8033376375797316, "learning_rate": 1.4168320770034027e-06, "loss": 0.5389, "step": 11957 }, { "epoch": 1.52, "grad_norm": 0.8003877712300571, "learning_rate": 1.4161126736118818e-06, "loss": 0.5532, "step": 11958 }, { "epoch": 1.52, "grad_norm": 0.7013408170494367, "learning_rate": 1.4153934227749906e-06, "loss": 0.5007, "step": 11959 }, { "epoch": 1.52, "grad_norm": 0.5777901475393556, "learning_rate": 1.4146743245233463e-06, "loss": 0.445, "step": 11960 }, { "epoch": 1.52, "grad_norm": 0.60618280587829, "learning_rate": 1.413955378887561e-06, "loss": 0.4237, "step": 11961 }, { "epoch": 1.52, "grad_norm": 0.6977927192262073, "learning_rate": 1.4132365858982339e-06, "loss": 0.4372, "step": 11962 }, { "epoch": 1.52, "grad_norm": 0.5529689275852305, "learning_rate": 1.4125179455859634e-06, "loss": 0.4435, "step": 11963 }, { "epoch": 1.52, "grad_norm": 0.6465283117409542, "learning_rate": 1.4117994579813421e-06, "loss": 0.4524, "step": 11964 }, { "epoch": 1.52, "grad_norm": 0.6324985273586289, "learning_rate": 1.4110811231149495e-06, "loss": 0.4954, "step": 11965 }, { "epoch": 1.52, "grad_norm": 0.6556563841419782, "learning_rate": 1.410362941017367e-06, "loss": 0.4788, "step": 11966 }, { "epoch": 1.52, "grad_norm": 0.6874829131602086, "learning_rate": 1.4096449117191619e-06, "loss": 0.3874, "step": 11967 }, { "epoch": 1.52, "grad_norm": 0.659648034006389, "learning_rate": 1.408927035250901e-06, "loss": 0.4688, "step": 11968 }, { "epoch": 1.52, "grad_norm": 0.760973846093577, "learning_rate": 1.408209311643139e-06, "loss": 0.4835, "step": 11969 }, { "epoch": 1.52, "grad_norm": 0.7310589069948038, "learning_rate": 1.4074917409264304e-06, "loss": 0.4855, "step": 11970 }, { "epoch": 1.53, "grad_norm": 0.683464203817682, "learning_rate": 1.4067743231313174e-06, "loss": 0.5, "step": 11971 }, { "epoch": 1.53, "grad_norm": 0.596613413010919, "learning_rate": 1.4060570582883388e-06, "loss": 0.4422, "step": 11972 }, { "epoch": 1.53, "grad_norm": 0.6278333890724107, "learning_rate": 1.4053399464280282e-06, "loss": 0.4571, "step": 11973 }, { "epoch": 1.53, "grad_norm": 0.7844696714547017, "learning_rate": 1.4046229875809076e-06, "loss": 0.5401, "step": 11974 }, { "epoch": 1.53, "grad_norm": 0.6894544135343352, "learning_rate": 1.403906181777499e-06, "loss": 0.5196, "step": 11975 }, { "epoch": 1.53, "grad_norm": 0.715277767498412, "learning_rate": 1.4031895290483112e-06, "loss": 0.5197, "step": 11976 }, { "epoch": 1.53, "grad_norm": 0.8171187872423455, "learning_rate": 1.4024730294238525e-06, "loss": 0.4879, "step": 11977 }, { "epoch": 1.53, "grad_norm": 0.6373667151281672, "learning_rate": 1.4017566829346196e-06, "loss": 0.4866, "step": 11978 }, { "epoch": 1.53, "grad_norm": 0.5903546033363626, "learning_rate": 1.401040489611108e-06, "loss": 0.4915, "step": 11979 }, { "epoch": 1.53, "grad_norm": 0.6854696279920867, "learning_rate": 1.4003244494838003e-06, "loss": 0.5144, "step": 11980 }, { "epoch": 1.53, "grad_norm": 0.5707116265821914, "learning_rate": 1.3996085625831801e-06, "loss": 0.4466, "step": 11981 }, { "epoch": 1.53, "grad_norm": 0.559962507230934, "learning_rate": 1.3988928289397158e-06, "loss": 0.4623, "step": 11982 }, { "epoch": 1.53, "grad_norm": 0.6003526781341626, "learning_rate": 1.3981772485838785e-06, "loss": 0.4422, "step": 11983 }, { "epoch": 1.53, "grad_norm": 0.6364568543294888, "learning_rate": 1.397461821546124e-06, "loss": 0.4571, "step": 11984 }, { "epoch": 1.53, "grad_norm": 0.752513548557085, "learning_rate": 1.3967465478569075e-06, "loss": 0.4927, "step": 11985 }, { "epoch": 1.53, "grad_norm": 0.6143365125200207, "learning_rate": 1.3960314275466768e-06, "loss": 0.4491, "step": 11986 }, { "epoch": 1.53, "grad_norm": 0.7371996943701631, "learning_rate": 1.3953164606458724e-06, "loss": 0.5085, "step": 11987 }, { "epoch": 1.53, "grad_norm": 0.776388144686789, "learning_rate": 1.3946016471849267e-06, "loss": 0.5261, "step": 11988 }, { "epoch": 1.53, "grad_norm": 0.5953005002791292, "learning_rate": 1.3938869871942683e-06, "loss": 0.4367, "step": 11989 }, { "epoch": 1.53, "grad_norm": 0.6058115728198914, "learning_rate": 1.3931724807043161e-06, "loss": 0.4764, "step": 11990 }, { "epoch": 1.53, "grad_norm": 0.630541521661115, "learning_rate": 1.3924581277454873e-06, "loss": 0.4654, "step": 11991 }, { "epoch": 1.53, "grad_norm": 0.7763509580356418, "learning_rate": 1.3917439283481865e-06, "loss": 0.5119, "step": 11992 }, { "epoch": 1.53, "grad_norm": 0.6724843055458318, "learning_rate": 1.3910298825428175e-06, "loss": 0.482, "step": 11993 }, { "epoch": 1.53, "grad_norm": 0.6716993553319668, "learning_rate": 1.3903159903597728e-06, "loss": 0.4846, "step": 11994 }, { "epoch": 1.53, "grad_norm": 0.9784978355260202, "learning_rate": 1.3896022518294428e-06, "loss": 0.5521, "step": 11995 }, { "epoch": 1.53, "grad_norm": 0.7067033772717441, "learning_rate": 1.388888666982206e-06, "loss": 0.5196, "step": 11996 }, { "epoch": 1.53, "grad_norm": 0.7225292070001212, "learning_rate": 1.388175235848439e-06, "loss": 0.4127, "step": 11997 }, { "epoch": 1.53, "grad_norm": 0.5543762154454209, "learning_rate": 1.387461958458511e-06, "loss": 0.3994, "step": 11998 }, { "epoch": 1.53, "grad_norm": 0.599151813254321, "learning_rate": 1.386748834842785e-06, "loss": 0.4379, "step": 11999 }, { "epoch": 1.53, "grad_norm": 0.5865043355783855, "learning_rate": 1.386035865031613e-06, "loss": 0.4301, "step": 12000 }, { "epoch": 1.53, "grad_norm": 0.5848885818649584, "learning_rate": 1.385323049055347e-06, "loss": 0.4419, "step": 12001 }, { "epoch": 1.53, "grad_norm": 0.5912220308104572, "learning_rate": 1.3846103869443268e-06, "loss": 0.4203, "step": 12002 }, { "epoch": 1.53, "grad_norm": 0.6715663063868861, "learning_rate": 1.3838978787288904e-06, "loss": 0.4923, "step": 12003 }, { "epoch": 1.53, "grad_norm": 0.7121142360824272, "learning_rate": 1.3831855244393644e-06, "loss": 0.5126, "step": 12004 }, { "epoch": 1.53, "grad_norm": 0.7356118229738676, "learning_rate": 1.382473324106075e-06, "loss": 0.5172, "step": 12005 }, { "epoch": 1.53, "grad_norm": 0.719605730545862, "learning_rate": 1.3817612777593342e-06, "loss": 0.5322, "step": 12006 }, { "epoch": 1.53, "grad_norm": 0.9518142582230482, "learning_rate": 1.3810493854294555e-06, "loss": 0.5216, "step": 12007 }, { "epoch": 1.53, "grad_norm": 0.789281302342054, "learning_rate": 1.380337647146738e-06, "loss": 0.5278, "step": 12008 }, { "epoch": 1.53, "grad_norm": 0.764922050200863, "learning_rate": 1.3796260629414815e-06, "loss": 0.532, "step": 12009 }, { "epoch": 1.53, "grad_norm": 0.7132716800622939, "learning_rate": 1.3789146328439717e-06, "loss": 0.58, "step": 12010 }, { "epoch": 1.53, "grad_norm": 0.6035890525939783, "learning_rate": 1.3782033568844977e-06, "loss": 0.433, "step": 12011 }, { "epoch": 1.53, "grad_norm": 0.9175538265553989, "learning_rate": 1.3774922350933316e-06, "loss": 0.5188, "step": 12012 }, { "epoch": 1.53, "grad_norm": 0.7275564193382649, "learning_rate": 1.3767812675007468e-06, "loss": 0.4948, "step": 12013 }, { "epoch": 1.53, "grad_norm": 0.5811452290358411, "learning_rate": 1.376070454137004e-06, "loss": 0.4266, "step": 12014 }, { "epoch": 1.53, "grad_norm": 0.5887462201873143, "learning_rate": 1.3753597950323627e-06, "loss": 0.4516, "step": 12015 }, { "epoch": 1.53, "grad_norm": 0.6648455382005889, "learning_rate": 1.3746492902170704e-06, "loss": 0.4698, "step": 12016 }, { "epoch": 1.53, "grad_norm": 0.5956197504538495, "learning_rate": 1.3739389397213748e-06, "loss": 0.4127, "step": 12017 }, { "epoch": 1.53, "grad_norm": 0.5668148435660866, "learning_rate": 1.3732287435755098e-06, "loss": 0.4182, "step": 12018 }, { "epoch": 1.53, "grad_norm": 0.8327981422838105, "learning_rate": 1.3725187018097092e-06, "loss": 0.5063, "step": 12019 }, { "epoch": 1.53, "grad_norm": 0.6948335758715407, "learning_rate": 1.371808814454194e-06, "loss": 0.4724, "step": 12020 }, { "epoch": 1.53, "grad_norm": 0.6996454064694048, "learning_rate": 1.3710990815391856e-06, "loss": 0.4602, "step": 12021 }, { "epoch": 1.53, "grad_norm": 0.7545763732845558, "learning_rate": 1.370389503094891e-06, "loss": 0.5171, "step": 12022 }, { "epoch": 1.53, "grad_norm": 0.7948560121583169, "learning_rate": 1.3696800791515164e-06, "loss": 0.5113, "step": 12023 }, { "epoch": 1.53, "grad_norm": 0.5852654683234477, "learning_rate": 1.3689708097392618e-06, "loss": 0.4201, "step": 12024 }, { "epoch": 1.53, "grad_norm": 0.6401586278192057, "learning_rate": 1.3682616948883155e-06, "loss": 0.4478, "step": 12025 }, { "epoch": 1.53, "grad_norm": 0.6210172028731479, "learning_rate": 1.367552734628863e-06, "loss": 0.4775, "step": 12026 }, { "epoch": 1.53, "grad_norm": 0.8480376919665327, "learning_rate": 1.3668439289910845e-06, "loss": 0.5094, "step": 12027 }, { "epoch": 1.53, "grad_norm": 0.8376740122868973, "learning_rate": 1.366135278005149e-06, "loss": 0.5546, "step": 12028 }, { "epoch": 1.53, "grad_norm": 0.728657437150289, "learning_rate": 1.3654267817012234e-06, "loss": 0.533, "step": 12029 }, { "epoch": 1.53, "grad_norm": 0.6027753227818549, "learning_rate": 1.3647184401094644e-06, "loss": 0.48, "step": 12030 }, { "epoch": 1.53, "grad_norm": 0.7149601753124744, "learning_rate": 1.3640102532600258e-06, "loss": 0.476, "step": 12031 }, { "epoch": 1.53, "grad_norm": 0.6385823989025762, "learning_rate": 1.3633022211830504e-06, "loss": 0.4798, "step": 12032 }, { "epoch": 1.53, "grad_norm": 0.7472804729014835, "learning_rate": 1.3625943439086798e-06, "loss": 0.5726, "step": 12033 }, { "epoch": 1.53, "grad_norm": 0.7239753873664908, "learning_rate": 1.3618866214670428e-06, "loss": 0.5045, "step": 12034 }, { "epoch": 1.53, "grad_norm": 0.7304990435440841, "learning_rate": 1.3611790538882664e-06, "loss": 0.5283, "step": 12035 }, { "epoch": 1.53, "grad_norm": 0.5521157313800197, "learning_rate": 1.3604716412024714e-06, "loss": 0.4331, "step": 12036 }, { "epoch": 1.53, "grad_norm": 0.6036097608299229, "learning_rate": 1.3597643834397668e-06, "loss": 0.5122, "step": 12037 }, { "epoch": 1.53, "grad_norm": 0.7533718774539668, "learning_rate": 1.3590572806302616e-06, "loss": 0.5385, "step": 12038 }, { "epoch": 1.53, "grad_norm": 0.6567158855842666, "learning_rate": 1.3583503328040516e-06, "loss": 0.4663, "step": 12039 }, { "epoch": 1.53, "grad_norm": 0.6385511712526343, "learning_rate": 1.3576435399912325e-06, "loss": 0.5105, "step": 12040 }, { "epoch": 1.53, "grad_norm": 0.5332103668731598, "learning_rate": 1.3569369022218865e-06, "loss": 0.4204, "step": 12041 }, { "epoch": 1.53, "grad_norm": 0.6240753870255737, "learning_rate": 1.3562304195260972e-06, "loss": 0.5035, "step": 12042 }, { "epoch": 1.53, "grad_norm": 0.7432969870032272, "learning_rate": 1.355524091933933e-06, "loss": 0.4922, "step": 12043 }, { "epoch": 1.53, "grad_norm": 0.6478422187972719, "learning_rate": 1.354817919475464e-06, "loss": 0.4699, "step": 12044 }, { "epoch": 1.53, "grad_norm": 0.8340793949355882, "learning_rate": 1.3541119021807463e-06, "loss": 0.4788, "step": 12045 }, { "epoch": 1.53, "grad_norm": 0.7354849173318426, "learning_rate": 1.353406040079836e-06, "loss": 0.5046, "step": 12046 }, { "epoch": 1.53, "grad_norm": 0.7933718921371866, "learning_rate": 1.3527003332027744e-06, "loss": 0.5838, "step": 12047 }, { "epoch": 1.53, "grad_norm": 0.7728542798497595, "learning_rate": 1.3519947815796074e-06, "loss": 0.5198, "step": 12048 }, { "epoch": 1.54, "grad_norm": 0.7776108974327598, "learning_rate": 1.351289385240364e-06, "loss": 0.4693, "step": 12049 }, { "epoch": 1.54, "grad_norm": 0.5559247212531438, "learning_rate": 1.3505841442150736e-06, "loss": 0.4042, "step": 12050 }, { "epoch": 1.54, "grad_norm": 0.6615737814492144, "learning_rate": 1.3498790585337523e-06, "loss": 0.5017, "step": 12051 }, { "epoch": 1.54, "grad_norm": 0.7068452835375392, "learning_rate": 1.3491741282264175e-06, "loss": 0.5079, "step": 12052 }, { "epoch": 1.54, "grad_norm": 0.7972622619836104, "learning_rate": 1.3484693533230715e-06, "loss": 0.5144, "step": 12053 }, { "epoch": 1.54, "grad_norm": 0.8618299790565459, "learning_rate": 1.3477647338537187e-06, "loss": 0.5675, "step": 12054 }, { "epoch": 1.54, "grad_norm": 0.5667896470631263, "learning_rate": 1.3470602698483481e-06, "loss": 0.4681, "step": 12055 }, { "epoch": 1.54, "grad_norm": 0.6748915693617862, "learning_rate": 1.3463559613369503e-06, "loss": 0.465, "step": 12056 }, { "epoch": 1.54, "grad_norm": 0.6885306501403046, "learning_rate": 1.345651808349503e-06, "loss": 0.4745, "step": 12057 }, { "epoch": 1.54, "grad_norm": 0.7120451344162045, "learning_rate": 1.3449478109159815e-06, "loss": 0.5033, "step": 12058 }, { "epoch": 1.54, "grad_norm": 0.6966980436604581, "learning_rate": 1.3442439690663489e-06, "loss": 0.4948, "step": 12059 }, { "epoch": 1.54, "grad_norm": 0.7381123997547461, "learning_rate": 1.343540282830571e-06, "loss": 0.4995, "step": 12060 }, { "epoch": 1.54, "grad_norm": 0.8350715521178228, "learning_rate": 1.3428367522385982e-06, "loss": 0.5494, "step": 12061 }, { "epoch": 1.54, "grad_norm": 0.5698619993427309, "learning_rate": 1.3421333773203793e-06, "loss": 0.4315, "step": 12062 }, { "epoch": 1.54, "grad_norm": 0.7480622567180749, "learning_rate": 1.341430158105852e-06, "loss": 0.4323, "step": 12063 }, { "epoch": 1.54, "grad_norm": 0.7407455800193824, "learning_rate": 1.3407270946249534e-06, "loss": 0.4965, "step": 12064 }, { "epoch": 1.54, "grad_norm": 0.7240674754276362, "learning_rate": 1.340024186907608e-06, "loss": 0.528, "step": 12065 }, { "epoch": 1.54, "grad_norm": 0.8642763106818814, "learning_rate": 1.3393214349837386e-06, "loss": 0.4856, "step": 12066 }, { "epoch": 1.54, "grad_norm": 0.7394800429468008, "learning_rate": 1.338618838883256e-06, "loss": 0.4749, "step": 12067 }, { "epoch": 1.54, "grad_norm": 0.6150551784682666, "learning_rate": 1.337916398636071e-06, "loss": 0.42, "step": 12068 }, { "epoch": 1.54, "grad_norm": 0.6873638367457529, "learning_rate": 1.3372141142720812e-06, "loss": 0.4374, "step": 12069 }, { "epoch": 1.54, "grad_norm": 0.6641941312641578, "learning_rate": 1.3365119858211834e-06, "loss": 0.4841, "step": 12070 }, { "epoch": 1.54, "grad_norm": 0.6623330608196557, "learning_rate": 1.3358100133132618e-06, "loss": 0.5534, "step": 12071 }, { "epoch": 1.54, "grad_norm": 0.7092752234689075, "learning_rate": 1.3351081967781993e-06, "loss": 0.5103, "step": 12072 }, { "epoch": 1.54, "grad_norm": 0.7222241989862542, "learning_rate": 1.3344065362458692e-06, "loss": 0.5139, "step": 12073 }, { "epoch": 1.54, "grad_norm": 0.7366382858788794, "learning_rate": 1.333705031746141e-06, "loss": 0.5857, "step": 12074 }, { "epoch": 1.54, "grad_norm": 0.7279613774005129, "learning_rate": 1.3330036833088722e-06, "loss": 0.4768, "step": 12075 }, { "epoch": 1.54, "grad_norm": 0.7934915332132335, "learning_rate": 1.3323024909639198e-06, "loss": 0.4831, "step": 12076 }, { "epoch": 1.54, "grad_norm": 0.6530731278777971, "learning_rate": 1.3316014547411287e-06, "loss": 0.5131, "step": 12077 }, { "epoch": 1.54, "grad_norm": 0.7010186099461464, "learning_rate": 1.330900574670343e-06, "loss": 0.5265, "step": 12078 }, { "epoch": 1.54, "grad_norm": 0.7777954514611753, "learning_rate": 1.3301998507813935e-06, "loss": 0.5841, "step": 12079 }, { "epoch": 1.54, "grad_norm": 0.8880143852487065, "learning_rate": 1.3294992831041103e-06, "loss": 0.5229, "step": 12080 }, { "epoch": 1.54, "grad_norm": 0.7352238603745523, "learning_rate": 1.3287988716683125e-06, "loss": 0.536, "step": 12081 }, { "epoch": 1.54, "grad_norm": 1.228604763000831, "learning_rate": 1.3280986165038161e-06, "loss": 0.5556, "step": 12082 }, { "epoch": 1.54, "grad_norm": 0.7935743611267209, "learning_rate": 1.327398517640427e-06, "loss": 0.5521, "step": 12083 }, { "epoch": 1.54, "grad_norm": 0.791572021738493, "learning_rate": 1.3266985751079469e-06, "loss": 0.5091, "step": 12084 }, { "epoch": 1.54, "grad_norm": 0.5683427860993228, "learning_rate": 1.3259987889361719e-06, "loss": 0.497, "step": 12085 }, { "epoch": 1.54, "grad_norm": 0.6352459097490646, "learning_rate": 1.3252991591548864e-06, "loss": 0.4432, "step": 12086 }, { "epoch": 1.54, "grad_norm": 0.8548758028703562, "learning_rate": 1.3245996857938725e-06, "loss": 0.5622, "step": 12087 }, { "epoch": 1.54, "grad_norm": 0.8599568683874101, "learning_rate": 1.3239003688829071e-06, "loss": 0.5393, "step": 12088 }, { "epoch": 1.54, "grad_norm": 0.7292897431982562, "learning_rate": 1.3232012084517543e-06, "loss": 0.5114, "step": 12089 }, { "epoch": 1.54, "grad_norm": 0.6230302240098855, "learning_rate": 1.3225022045301782e-06, "loss": 0.4503, "step": 12090 }, { "epoch": 1.54, "grad_norm": 0.6487511134507169, "learning_rate": 1.3218033571479305e-06, "loss": 0.4629, "step": 12091 }, { "epoch": 1.54, "grad_norm": 0.5570290425026649, "learning_rate": 1.321104666334761e-06, "loss": 0.4442, "step": 12092 }, { "epoch": 1.54, "grad_norm": 0.7207139833404483, "learning_rate": 1.3204061321204086e-06, "loss": 0.5001, "step": 12093 }, { "epoch": 1.54, "grad_norm": 0.9710680291592123, "learning_rate": 1.3197077545346104e-06, "loss": 0.5043, "step": 12094 }, { "epoch": 1.54, "grad_norm": 0.73662800335934, "learning_rate": 1.3190095336070907e-06, "loss": 0.4577, "step": 12095 }, { "epoch": 1.54, "grad_norm": 0.6377225157105362, "learning_rate": 1.3183114693675724e-06, "loss": 0.4667, "step": 12096 }, { "epoch": 1.54, "grad_norm": 0.7969274427891757, "learning_rate": 1.317613561845772e-06, "loss": 0.5088, "step": 12097 }, { "epoch": 1.54, "grad_norm": 0.6485392494273158, "learning_rate": 1.3169158110713925e-06, "loss": 0.4531, "step": 12098 }, { "epoch": 1.54, "grad_norm": 0.6979739127371077, "learning_rate": 1.31621821707414e-06, "loss": 0.4848, "step": 12099 }, { "epoch": 1.54, "grad_norm": 0.6320257066522543, "learning_rate": 1.3155207798837044e-06, "loss": 0.4312, "step": 12100 }, { "epoch": 1.54, "grad_norm": 0.6272004451811362, "learning_rate": 1.3148234995297764e-06, "loss": 0.4429, "step": 12101 }, { "epoch": 1.54, "grad_norm": 0.6848386892239477, "learning_rate": 1.3141263760420347e-06, "loss": 0.4592, "step": 12102 }, { "epoch": 1.54, "grad_norm": 0.6697016269295136, "learning_rate": 1.3134294094501566e-06, "loss": 0.4783, "step": 12103 }, { "epoch": 1.54, "grad_norm": 0.6369010563225966, "learning_rate": 1.3127325997838063e-06, "loss": 0.4638, "step": 12104 }, { "epoch": 1.54, "grad_norm": 0.7203844643870734, "learning_rate": 1.3120359470726478e-06, "loss": 0.5203, "step": 12105 }, { "epoch": 1.54, "grad_norm": 0.8189422981478071, "learning_rate": 1.3113394513463324e-06, "loss": 0.5152, "step": 12106 }, { "epoch": 1.54, "grad_norm": 0.6800999476337117, "learning_rate": 1.310643112634511e-06, "loss": 0.4665, "step": 12107 }, { "epoch": 1.54, "grad_norm": 0.7594970616419042, "learning_rate": 1.3099469309668211e-06, "loss": 0.5141, "step": 12108 }, { "epoch": 1.54, "grad_norm": 0.6256236954318253, "learning_rate": 1.3092509063728987e-06, "loss": 0.4213, "step": 12109 }, { "epoch": 1.54, "grad_norm": 0.6274304158933709, "learning_rate": 1.3085550388823714e-06, "loss": 0.4504, "step": 12110 }, { "epoch": 1.54, "grad_norm": 0.624608350182335, "learning_rate": 1.3078593285248609e-06, "loss": 0.5172, "step": 12111 }, { "epoch": 1.54, "grad_norm": 0.7195476007932664, "learning_rate": 1.3071637753299792e-06, "loss": 0.5353, "step": 12112 }, { "epoch": 1.54, "grad_norm": 0.6374266275720509, "learning_rate": 1.306468379327337e-06, "loss": 0.4537, "step": 12113 }, { "epoch": 1.54, "grad_norm": 0.8701038644384377, "learning_rate": 1.3057731405465307e-06, "loss": 0.5198, "step": 12114 }, { "epoch": 1.54, "grad_norm": 0.7421810390450814, "learning_rate": 1.3050780590171591e-06, "loss": 0.4696, "step": 12115 }, { "epoch": 1.54, "grad_norm": 0.6279326902384634, "learning_rate": 1.3043831347688052e-06, "loss": 0.4944, "step": 12116 }, { "epoch": 1.54, "grad_norm": 0.669829796420056, "learning_rate": 1.303688367831053e-06, "loss": 0.4586, "step": 12117 }, { "epoch": 1.54, "grad_norm": 0.5215844783350656, "learning_rate": 1.3029937582334745e-06, "loss": 0.3993, "step": 12118 }, { "epoch": 1.54, "grad_norm": 0.6719171014645522, "learning_rate": 1.3022993060056387e-06, "loss": 0.425, "step": 12119 }, { "epoch": 1.54, "grad_norm": 0.6393278698691742, "learning_rate": 1.3016050111771038e-06, "loss": 0.4525, "step": 12120 }, { "epoch": 1.54, "grad_norm": 0.6941019349828709, "learning_rate": 1.3009108737774255e-06, "loss": 0.4765, "step": 12121 }, { "epoch": 1.54, "grad_norm": 0.7764509877990751, "learning_rate": 1.3002168938361503e-06, "loss": 0.5139, "step": 12122 }, { "epoch": 1.54, "grad_norm": 0.6198335544671932, "learning_rate": 1.2995230713828211e-06, "loss": 0.4554, "step": 12123 }, { "epoch": 1.54, "grad_norm": 0.6931633446528205, "learning_rate": 1.2988294064469682e-06, "loss": 0.4452, "step": 12124 }, { "epoch": 1.54, "grad_norm": 0.6176694683432442, "learning_rate": 1.298135899058121e-06, "loss": 0.4779, "step": 12125 }, { "epoch": 1.54, "grad_norm": 0.6253982772440863, "learning_rate": 1.2974425492457987e-06, "loss": 0.4577, "step": 12126 }, { "epoch": 1.54, "grad_norm": 0.6111019816293698, "learning_rate": 1.2967493570395162e-06, "loss": 0.4281, "step": 12127 }, { "epoch": 1.55, "grad_norm": 0.7271373161459651, "learning_rate": 1.2960563224687782e-06, "loss": 0.5598, "step": 12128 }, { "epoch": 1.55, "grad_norm": 0.8661162994084268, "learning_rate": 1.2953634455630882e-06, "loss": 0.4999, "step": 12129 }, { "epoch": 1.55, "grad_norm": 0.6093693891595487, "learning_rate": 1.2946707263519358e-06, "loss": 0.4277, "step": 12130 }, { "epoch": 1.55, "grad_norm": 0.6750268666595707, "learning_rate": 1.2939781648648126e-06, "loss": 0.4693, "step": 12131 }, { "epoch": 1.55, "grad_norm": 0.9375967774473747, "learning_rate": 1.2932857611311938e-06, "loss": 0.4717, "step": 12132 }, { "epoch": 1.55, "grad_norm": 0.6979174523347548, "learning_rate": 1.2925935151805574e-06, "loss": 0.4825, "step": 12133 }, { "epoch": 1.55, "grad_norm": 0.7295567230482443, "learning_rate": 1.2919014270423642e-06, "loss": 0.4739, "step": 12134 }, { "epoch": 1.55, "grad_norm": 0.5946189045867745, "learning_rate": 1.2912094967460814e-06, "loss": 0.5411, "step": 12135 }, { "epoch": 1.55, "grad_norm": 0.7320500969833951, "learning_rate": 1.2905177243211569e-06, "loss": 0.5737, "step": 12136 }, { "epoch": 1.55, "grad_norm": 0.8188506560258125, "learning_rate": 1.2898261097970405e-06, "loss": 0.5199, "step": 12137 }, { "epoch": 1.55, "grad_norm": 0.6447054120337272, "learning_rate": 1.2891346532031696e-06, "loss": 0.4992, "step": 12138 }, { "epoch": 1.55, "grad_norm": 0.896678811353179, "learning_rate": 1.2884433545689795e-06, "loss": 0.4813, "step": 12139 }, { "epoch": 1.55, "grad_norm": 0.609924349573137, "learning_rate": 1.2877522139238935e-06, "loss": 0.4727, "step": 12140 }, { "epoch": 1.55, "grad_norm": 0.7305691380405754, "learning_rate": 1.2870612312973352e-06, "loss": 0.5136, "step": 12141 }, { "epoch": 1.55, "grad_norm": 0.5423139370975589, "learning_rate": 1.2863704067187133e-06, "loss": 0.4303, "step": 12142 }, { "epoch": 1.55, "grad_norm": 0.6298921714649235, "learning_rate": 1.285679740217438e-06, "loss": 0.4792, "step": 12143 }, { "epoch": 1.55, "grad_norm": 0.7067523917070067, "learning_rate": 1.2849892318229052e-06, "loss": 0.4837, "step": 12144 }, { "epoch": 1.55, "grad_norm": 0.5361793400098595, "learning_rate": 1.2842988815645112e-06, "loss": 0.4301, "step": 12145 }, { "epoch": 1.55, "grad_norm": 0.7016427025651549, "learning_rate": 1.2836086894716383e-06, "loss": 0.5035, "step": 12146 }, { "epoch": 1.55, "grad_norm": 0.7081089744379102, "learning_rate": 1.2829186555736677e-06, "loss": 0.48, "step": 12147 }, { "epoch": 1.55, "grad_norm": 0.6380552995209654, "learning_rate": 1.2822287798999732e-06, "loss": 0.4744, "step": 12148 }, { "epoch": 1.55, "grad_norm": 0.6667388960910207, "learning_rate": 1.2815390624799178e-06, "loss": 0.4574, "step": 12149 }, { "epoch": 1.55, "grad_norm": 0.6598187171809405, "learning_rate": 1.2808495033428626e-06, "loss": 0.4224, "step": 12150 }, { "epoch": 1.55, "grad_norm": 0.7255360235814536, "learning_rate": 1.2801601025181603e-06, "loss": 0.4649, "step": 12151 }, { "epoch": 1.55, "grad_norm": 0.8093272920090693, "learning_rate": 1.2794708600351541e-06, "loss": 0.4817, "step": 12152 }, { "epoch": 1.55, "grad_norm": 0.66882049106186, "learning_rate": 1.2787817759231857e-06, "loss": 0.4617, "step": 12153 }, { "epoch": 1.55, "grad_norm": 0.7268781763045137, "learning_rate": 1.2780928502115842e-06, "loss": 0.4515, "step": 12154 }, { "epoch": 1.55, "grad_norm": 0.6945349554903057, "learning_rate": 1.277404082929678e-06, "loss": 0.4325, "step": 12155 }, { "epoch": 1.55, "grad_norm": 0.7655594386980333, "learning_rate": 1.2767154741067833e-06, "loss": 0.5102, "step": 12156 }, { "epoch": 1.55, "grad_norm": 0.8796861962807149, "learning_rate": 1.276027023772214e-06, "loss": 0.5805, "step": 12157 }, { "epoch": 1.55, "grad_norm": 0.7390861518780986, "learning_rate": 1.2753387319552724e-06, "loss": 0.552, "step": 12158 }, { "epoch": 1.55, "grad_norm": 0.7806630766164695, "learning_rate": 1.2746505986852592e-06, "loss": 0.5525, "step": 12159 }, { "epoch": 1.55, "grad_norm": 0.7900047159400655, "learning_rate": 1.2739626239914671e-06, "loss": 0.5096, "step": 12160 }, { "epoch": 1.55, "grad_norm": 0.6624885154875821, "learning_rate": 1.2732748079031776e-06, "loss": 0.4226, "step": 12161 }, { "epoch": 1.55, "grad_norm": 0.5547945150624433, "learning_rate": 1.2725871504496717e-06, "loss": 0.4404, "step": 12162 }, { "epoch": 1.55, "grad_norm": 0.6645488152446583, "learning_rate": 1.271899651660219e-06, "loss": 0.4701, "step": 12163 }, { "epoch": 1.55, "grad_norm": 0.7403970527336247, "learning_rate": 1.2712123115640861e-06, "loss": 0.4832, "step": 12164 }, { "epoch": 1.55, "grad_norm": 0.733369267388733, "learning_rate": 1.270525130190528e-06, "loss": 0.509, "step": 12165 }, { "epoch": 1.55, "grad_norm": 0.7104349050000065, "learning_rate": 1.2698381075687987e-06, "loss": 0.496, "step": 12166 }, { "epoch": 1.55, "grad_norm": 0.5857822094644917, "learning_rate": 1.26915124372814e-06, "loss": 0.4845, "step": 12167 }, { "epoch": 1.55, "grad_norm": 0.7287814478737366, "learning_rate": 1.2684645386977922e-06, "loss": 0.5138, "step": 12168 }, { "epoch": 1.55, "grad_norm": 0.6654302738114459, "learning_rate": 1.2677779925069833e-06, "loss": 0.4913, "step": 12169 }, { "epoch": 1.55, "grad_norm": 0.7193514191654964, "learning_rate": 1.2670916051849402e-06, "loss": 0.5084, "step": 12170 }, { "epoch": 1.55, "grad_norm": 0.6400433094598214, "learning_rate": 1.2664053767608758e-06, "loss": 0.4938, "step": 12171 }, { "epoch": 1.55, "grad_norm": 0.691141547993291, "learning_rate": 1.2657193072640067e-06, "loss": 0.4869, "step": 12172 }, { "epoch": 1.55, "grad_norm": 0.6072485094679723, "learning_rate": 1.2650333967235317e-06, "loss": 0.4666, "step": 12173 }, { "epoch": 1.55, "grad_norm": 0.740002960864857, "learning_rate": 1.2643476451686521e-06, "loss": 0.4887, "step": 12174 }, { "epoch": 1.55, "grad_norm": 0.5621174156379379, "learning_rate": 1.2636620526285538e-06, "loss": 0.38, "step": 12175 }, { "epoch": 1.55, "grad_norm": 0.721917331125709, "learning_rate": 1.2629766191324239e-06, "loss": 0.4522, "step": 12176 }, { "epoch": 1.55, "grad_norm": 0.6174449980458677, "learning_rate": 1.262291344709436e-06, "loss": 0.4421, "step": 12177 }, { "epoch": 1.55, "grad_norm": 0.7523593015544058, "learning_rate": 1.2616062293887627e-06, "loss": 0.485, "step": 12178 }, { "epoch": 1.55, "grad_norm": 0.7731674832680517, "learning_rate": 1.2609212731995646e-06, "loss": 0.5184, "step": 12179 }, { "epoch": 1.55, "grad_norm": 0.6068576554073747, "learning_rate": 1.2602364761710011e-06, "loss": 0.5015, "step": 12180 }, { "epoch": 1.55, "grad_norm": 0.573802629544794, "learning_rate": 1.2595518383322192e-06, "loss": 0.4576, "step": 12181 }, { "epoch": 1.55, "grad_norm": 0.8411404592601831, "learning_rate": 1.258867359712364e-06, "loss": 0.5467, "step": 12182 }, { "epoch": 1.55, "grad_norm": 0.706107522245884, "learning_rate": 1.2581830403405671e-06, "loss": 0.4851, "step": 12183 }, { "epoch": 1.55, "grad_norm": 0.7747634621182083, "learning_rate": 1.2574988802459642e-06, "loss": 0.5434, "step": 12184 }, { "epoch": 1.55, "grad_norm": 0.7789075493157012, "learning_rate": 1.2568148794576733e-06, "loss": 0.4821, "step": 12185 }, { "epoch": 1.55, "grad_norm": 0.6027091414626022, "learning_rate": 1.2561310380048131e-06, "loss": 0.4282, "step": 12186 }, { "epoch": 1.55, "grad_norm": 0.7379954023631474, "learning_rate": 1.2554473559164892e-06, "loss": 0.5152, "step": 12187 }, { "epoch": 1.55, "grad_norm": 0.6523635331959803, "learning_rate": 1.2547638332218064e-06, "loss": 0.4539, "step": 12188 }, { "epoch": 1.55, "grad_norm": 0.5473156922037804, "learning_rate": 1.2540804699498587e-06, "loss": 0.4383, "step": 12189 }, { "epoch": 1.55, "grad_norm": 0.6394321915948727, "learning_rate": 1.2533972661297362e-06, "loss": 0.455, "step": 12190 }, { "epoch": 1.55, "grad_norm": 0.747376378955863, "learning_rate": 1.252714221790518e-06, "loss": 0.5291, "step": 12191 }, { "epoch": 1.55, "grad_norm": 0.7409717423223107, "learning_rate": 1.2520313369612824e-06, "loss": 0.5398, "step": 12192 }, { "epoch": 1.55, "grad_norm": 0.7833992243321208, "learning_rate": 1.2513486116710943e-06, "loss": 0.5056, "step": 12193 }, { "epoch": 1.55, "grad_norm": 0.6571035268002016, "learning_rate": 1.2506660459490184e-06, "loss": 0.4778, "step": 12194 }, { "epoch": 1.55, "grad_norm": 0.7153858901775363, "learning_rate": 1.2499836398241067e-06, "loss": 0.4946, "step": 12195 }, { "epoch": 1.55, "grad_norm": 0.8505939561888495, "learning_rate": 1.2493013933254077e-06, "loss": 0.5191, "step": 12196 }, { "epoch": 1.55, "grad_norm": 0.5799425483289186, "learning_rate": 1.248619306481963e-06, "loss": 0.4882, "step": 12197 }, { "epoch": 1.55, "grad_norm": 0.682964603679965, "learning_rate": 1.2479373793228078e-06, "loss": 0.484, "step": 12198 }, { "epoch": 1.55, "grad_norm": 0.7005776746222734, "learning_rate": 1.2472556118769674e-06, "loss": 0.4686, "step": 12199 }, { "epoch": 1.55, "grad_norm": 1.2399686727685195, "learning_rate": 1.2465740041734654e-06, "loss": 0.4967, "step": 12200 }, { "epoch": 1.55, "grad_norm": 0.6339316133153569, "learning_rate": 1.2458925562413115e-06, "loss": 0.4796, "step": 12201 }, { "epoch": 1.55, "grad_norm": 0.6963249725523168, "learning_rate": 1.2452112681095169e-06, "loss": 0.4796, "step": 12202 }, { "epoch": 1.55, "grad_norm": 0.960494938755049, "learning_rate": 1.2445301398070787e-06, "loss": 0.4817, "step": 12203 }, { "epoch": 1.55, "grad_norm": 0.8040869860575909, "learning_rate": 1.243849171362993e-06, "loss": 0.4763, "step": 12204 }, { "epoch": 1.55, "grad_norm": 0.5860982728160293, "learning_rate": 1.2431683628062435e-06, "loss": 0.4433, "step": 12205 }, { "epoch": 1.56, "grad_norm": 0.6153230979369951, "learning_rate": 1.2424877141658138e-06, "loss": 0.4559, "step": 12206 }, { "epoch": 1.56, "grad_norm": 1.0474645249480654, "learning_rate": 1.2418072254706725e-06, "loss": 0.5504, "step": 12207 }, { "epoch": 1.56, "grad_norm": 0.7923641285967316, "learning_rate": 1.2411268967497891e-06, "loss": 0.5453, "step": 12208 }, { "epoch": 1.56, "grad_norm": 0.8149024018567816, "learning_rate": 1.2404467280321226e-06, "loss": 0.5346, "step": 12209 }, { "epoch": 1.56, "grad_norm": 0.5723444647728845, "learning_rate": 1.239766719346624e-06, "loss": 0.461, "step": 12210 }, { "epoch": 1.56, "grad_norm": 0.8341600617532505, "learning_rate": 1.2390868707222404e-06, "loss": 0.4962, "step": 12211 }, { "epoch": 1.56, "grad_norm": 0.8934393954563082, "learning_rate": 1.2384071821879118e-06, "loss": 0.6078, "step": 12212 }, { "epoch": 1.56, "grad_norm": 0.6313462385399999, "learning_rate": 1.2377276537725674e-06, "loss": 0.4458, "step": 12213 }, { "epoch": 1.56, "grad_norm": 0.7544559827012485, "learning_rate": 1.2370482855051363e-06, "loss": 0.492, "step": 12214 }, { "epoch": 1.56, "grad_norm": 5.065970038706937, "learning_rate": 1.2363690774145331e-06, "loss": 0.4907, "step": 12215 }, { "epoch": 1.56, "grad_norm": 0.8276724453287353, "learning_rate": 1.235690029529673e-06, "loss": 0.5103, "step": 12216 }, { "epoch": 1.56, "grad_norm": 0.5790238496706482, "learning_rate": 1.2350111418794581e-06, "loss": 0.4417, "step": 12217 }, { "epoch": 1.56, "grad_norm": 0.8600910722503226, "learning_rate": 1.2343324144927888e-06, "loss": 0.4927, "step": 12218 }, { "epoch": 1.56, "grad_norm": 0.5907564171565781, "learning_rate": 1.2336538473985543e-06, "loss": 0.4183, "step": 12219 }, { "epoch": 1.56, "grad_norm": 0.6189544718425022, "learning_rate": 1.2329754406256394e-06, "loss": 0.4978, "step": 12220 }, { "epoch": 1.56, "grad_norm": 0.7975705867872511, "learning_rate": 1.2322971942029243e-06, "loss": 0.5121, "step": 12221 }, { "epoch": 1.56, "grad_norm": 0.6866523240692386, "learning_rate": 1.231619108159276e-06, "loss": 0.4694, "step": 12222 }, { "epoch": 1.56, "grad_norm": 0.6358226451343981, "learning_rate": 1.2309411825235617e-06, "loss": 0.482, "step": 12223 }, { "epoch": 1.56, "grad_norm": 0.8089400293962615, "learning_rate": 1.2302634173246358e-06, "loss": 0.5263, "step": 12224 }, { "epoch": 1.56, "grad_norm": 0.6214155899665389, "learning_rate": 1.2295858125913512e-06, "loss": 0.5091, "step": 12225 }, { "epoch": 1.56, "grad_norm": 0.6031265495919599, "learning_rate": 1.2289083683525483e-06, "loss": 0.4401, "step": 12226 }, { "epoch": 1.56, "grad_norm": 0.63877342952, "learning_rate": 1.228231084637067e-06, "loss": 0.4719, "step": 12227 }, { "epoch": 1.56, "grad_norm": 0.6960781247040517, "learning_rate": 1.2275539614737348e-06, "loss": 0.4968, "step": 12228 }, { "epoch": 1.56, "grad_norm": 0.9124283585514914, "learning_rate": 1.2268769988913764e-06, "loss": 0.545, "step": 12229 }, { "epoch": 1.56, "grad_norm": 0.7376476796241536, "learning_rate": 1.2262001969188053e-06, "loss": 0.5467, "step": 12230 }, { "epoch": 1.56, "grad_norm": 0.6889677320960865, "learning_rate": 1.2255235555848343e-06, "loss": 0.4724, "step": 12231 }, { "epoch": 1.56, "grad_norm": 0.5825188662433707, "learning_rate": 1.224847074918263e-06, "loss": 0.4608, "step": 12232 }, { "epoch": 1.56, "grad_norm": 0.7048026441877644, "learning_rate": 1.2241707549478871e-06, "loss": 0.4785, "step": 12233 }, { "epoch": 1.56, "grad_norm": 0.5520290040891466, "learning_rate": 1.223494595702497e-06, "loss": 0.4321, "step": 12234 }, { "epoch": 1.56, "grad_norm": 0.7635078504457856, "learning_rate": 1.2228185972108752e-06, "loss": 0.4321, "step": 12235 }, { "epoch": 1.56, "grad_norm": 0.6702286900424755, "learning_rate": 1.2221427595017937e-06, "loss": 0.4967, "step": 12236 }, { "epoch": 1.56, "grad_norm": 0.7664898124945465, "learning_rate": 1.2214670826040248e-06, "loss": 0.4925, "step": 12237 }, { "epoch": 1.56, "grad_norm": 0.5886259589065718, "learning_rate": 1.220791566546326e-06, "loss": 0.48, "step": 12238 }, { "epoch": 1.56, "grad_norm": 0.7133073687144766, "learning_rate": 1.220116211357455e-06, "loss": 0.4709, "step": 12239 }, { "epoch": 1.56, "grad_norm": 0.8109322452064744, "learning_rate": 1.2194410170661568e-06, "loss": 0.5375, "step": 12240 }, { "epoch": 1.56, "grad_norm": 0.7763990664744106, "learning_rate": 1.2187659837011745e-06, "loss": 0.492, "step": 12241 }, { "epoch": 1.56, "grad_norm": 0.6388958559167375, "learning_rate": 1.2180911112912403e-06, "loss": 0.4661, "step": 12242 }, { "epoch": 1.56, "grad_norm": 0.7111246062202005, "learning_rate": 1.2174163998650835e-06, "loss": 0.4833, "step": 12243 }, { "epoch": 1.56, "grad_norm": 0.9536204881522976, "learning_rate": 1.2167418494514216e-06, "loss": 0.5542, "step": 12244 }, { "epoch": 1.56, "grad_norm": 0.8598486618833837, "learning_rate": 1.2160674600789702e-06, "loss": 0.5104, "step": 12245 }, { "epoch": 1.56, "grad_norm": 0.5592071502892354, "learning_rate": 1.2153932317764344e-06, "loss": 0.4448, "step": 12246 }, { "epoch": 1.56, "grad_norm": 0.6762090853401066, "learning_rate": 1.214719164572517e-06, "loss": 0.4618, "step": 12247 }, { "epoch": 1.56, "grad_norm": 0.6621196571549484, "learning_rate": 1.2140452584959067e-06, "loss": 0.4419, "step": 12248 }, { "epoch": 1.56, "grad_norm": 0.7201659770954338, "learning_rate": 1.2133715135752933e-06, "loss": 0.5003, "step": 12249 }, { "epoch": 1.56, "grad_norm": 0.7905958278772297, "learning_rate": 1.2126979298393532e-06, "loss": 0.5032, "step": 12250 }, { "epoch": 1.56, "grad_norm": 0.5545105955600493, "learning_rate": 1.2120245073167607e-06, "loss": 0.4477, "step": 12251 }, { "epoch": 1.56, "grad_norm": 0.7228509096423295, "learning_rate": 1.2113512460361788e-06, "loss": 0.4944, "step": 12252 }, { "epoch": 1.56, "grad_norm": 0.6516111491556387, "learning_rate": 1.210678146026269e-06, "loss": 0.517, "step": 12253 }, { "epoch": 1.56, "grad_norm": 0.7040754583234186, "learning_rate": 1.2100052073156803e-06, "loss": 0.4979, "step": 12254 }, { "epoch": 1.56, "grad_norm": 0.5702443256820937, "learning_rate": 1.2093324299330606e-06, "loss": 0.4242, "step": 12255 }, { "epoch": 1.56, "grad_norm": 0.7148581287502968, "learning_rate": 1.2086598139070444e-06, "loss": 0.5263, "step": 12256 }, { "epoch": 1.56, "grad_norm": 0.7522776800168697, "learning_rate": 1.2079873592662656e-06, "loss": 0.5172, "step": 12257 }, { "epoch": 1.56, "grad_norm": 0.7328240058092368, "learning_rate": 1.2073150660393452e-06, "loss": 0.4801, "step": 12258 }, { "epoch": 1.56, "grad_norm": 0.7086362747789404, "learning_rate": 1.2066429342549047e-06, "loss": 0.5002, "step": 12259 }, { "epoch": 1.56, "grad_norm": 0.5770118136899973, "learning_rate": 1.2059709639415519e-06, "loss": 0.4536, "step": 12260 }, { "epoch": 1.56, "grad_norm": 0.8567307762240879, "learning_rate": 1.2052991551278926e-06, "loss": 0.5325, "step": 12261 }, { "epoch": 1.56, "grad_norm": 0.7285198587353507, "learning_rate": 1.2046275078425206e-06, "loss": 0.4854, "step": 12262 }, { "epoch": 1.56, "grad_norm": 0.814008326035246, "learning_rate": 1.2039560221140284e-06, "loss": 0.5133, "step": 12263 }, { "epoch": 1.56, "grad_norm": 0.6132030917438644, "learning_rate": 1.2032846979709971e-06, "loss": 0.4514, "step": 12264 }, { "epoch": 1.56, "grad_norm": 0.6151179344012118, "learning_rate": 1.2026135354420049e-06, "loss": 0.4591, "step": 12265 }, { "epoch": 1.56, "grad_norm": 0.8614182171488346, "learning_rate": 1.2019425345556178e-06, "loss": 0.5783, "step": 12266 }, { "epoch": 1.56, "grad_norm": 0.731729986840844, "learning_rate": 1.2012716953404024e-06, "loss": 0.5013, "step": 12267 }, { "epoch": 1.56, "grad_norm": 0.7083262765343553, "learning_rate": 1.20060101782491e-06, "loss": 0.4541, "step": 12268 }, { "epoch": 1.56, "grad_norm": 0.7869536204952674, "learning_rate": 1.1999305020376928e-06, "loss": 0.5394, "step": 12269 }, { "epoch": 1.56, "grad_norm": 0.7098628778365018, "learning_rate": 1.199260148007289e-06, "loss": 0.4993, "step": 12270 }, { "epoch": 1.56, "grad_norm": 0.9779743064297722, "learning_rate": 1.1985899557622356e-06, "loss": 0.4473, "step": 12271 }, { "epoch": 1.56, "grad_norm": 0.6306517125220557, "learning_rate": 1.197919925331062e-06, "loss": 0.4315, "step": 12272 }, { "epoch": 1.56, "grad_norm": 0.657878536756602, "learning_rate": 1.197250056742285e-06, "loss": 0.4817, "step": 12273 }, { "epoch": 1.56, "grad_norm": 0.8150331182773332, "learning_rate": 1.1965803500244223e-06, "loss": 0.5336, "step": 12274 }, { "epoch": 1.56, "grad_norm": 0.576517935312396, "learning_rate": 1.1959108052059808e-06, "loss": 0.4541, "step": 12275 }, { "epoch": 1.56, "grad_norm": 0.685286964473662, "learning_rate": 1.1952414223154596e-06, "loss": 0.5053, "step": 12276 }, { "epoch": 1.56, "grad_norm": 0.7724786047068295, "learning_rate": 1.1945722013813533e-06, "loss": 0.5551, "step": 12277 }, { "epoch": 1.56, "grad_norm": 0.5699813539226222, "learning_rate": 1.1939031424321474e-06, "loss": 0.4244, "step": 12278 }, { "epoch": 1.56, "grad_norm": 0.6021195497126282, "learning_rate": 1.193234245496323e-06, "loss": 0.4121, "step": 12279 }, { "epoch": 1.56, "grad_norm": 0.6747659771713505, "learning_rate": 1.1925655106023504e-06, "loss": 0.5038, "step": 12280 }, { "epoch": 1.56, "grad_norm": 0.8259943744587641, "learning_rate": 1.1918969377786994e-06, "loss": 0.5303, "step": 12281 }, { "epoch": 1.56, "grad_norm": 0.8066337394234543, "learning_rate": 1.1912285270538249e-06, "loss": 0.5155, "step": 12282 }, { "epoch": 1.56, "grad_norm": 0.752798197724141, "learning_rate": 1.190560278456181e-06, "loss": 0.5047, "step": 12283 }, { "epoch": 1.56, "grad_norm": 0.789961350096785, "learning_rate": 1.1898921920142141e-06, "loss": 0.5103, "step": 12284 }, { "epoch": 1.57, "grad_norm": 0.7737972876278362, "learning_rate": 1.1892242677563604e-06, "loss": 0.5134, "step": 12285 }, { "epoch": 1.57, "grad_norm": 0.6713788131551305, "learning_rate": 1.1885565057110526e-06, "loss": 0.4643, "step": 12286 }, { "epoch": 1.57, "grad_norm": 0.5782034324468629, "learning_rate": 1.1878889059067138e-06, "loss": 0.4339, "step": 12287 }, { "epoch": 1.57, "grad_norm": 0.6000646951720657, "learning_rate": 1.1872214683717642e-06, "loss": 0.4432, "step": 12288 }, { "epoch": 1.57, "grad_norm": 0.6456262753819807, "learning_rate": 1.1865541931346109e-06, "loss": 0.4726, "step": 12289 }, { "epoch": 1.57, "grad_norm": 0.6577292865925803, "learning_rate": 1.1858870802236617e-06, "loss": 0.4461, "step": 12290 }, { "epoch": 1.57, "grad_norm": 0.6154646667009805, "learning_rate": 1.1852201296673095e-06, "loss": 0.493, "step": 12291 }, { "epoch": 1.57, "grad_norm": 0.8955536617545964, "learning_rate": 1.184553341493947e-06, "loss": 0.5433, "step": 12292 }, { "epoch": 1.57, "grad_norm": 0.5887572308986995, "learning_rate": 1.1838867157319557e-06, "loss": 0.4367, "step": 12293 }, { "epoch": 1.57, "grad_norm": 0.6215599397344796, "learning_rate": 1.183220252409713e-06, "loss": 0.4444, "step": 12294 }, { "epoch": 1.57, "grad_norm": 0.7207980196646274, "learning_rate": 1.182553951555585e-06, "loss": 0.4125, "step": 12295 }, { "epoch": 1.57, "grad_norm": 0.7544128095779276, "learning_rate": 1.1818878131979395e-06, "loss": 0.5295, "step": 12296 }, { "epoch": 1.57, "grad_norm": 0.6326616278302227, "learning_rate": 1.1812218373651275e-06, "loss": 0.4982, "step": 12297 }, { "epoch": 1.57, "grad_norm": 0.9032419255020211, "learning_rate": 1.1805560240854996e-06, "loss": 0.5261, "step": 12298 }, { "epoch": 1.57, "grad_norm": 0.6438284088629768, "learning_rate": 1.1798903733873956e-06, "loss": 0.4654, "step": 12299 }, { "epoch": 1.57, "grad_norm": 0.6412076546907539, "learning_rate": 1.1792248852991528e-06, "loss": 0.5006, "step": 12300 }, { "epoch": 1.57, "grad_norm": 0.8398886798153344, "learning_rate": 1.178559559849095e-06, "loss": 0.485, "step": 12301 }, { "epoch": 1.57, "grad_norm": 1.0301155543178229, "learning_rate": 1.177894397065547e-06, "loss": 0.509, "step": 12302 }, { "epoch": 1.57, "grad_norm": 0.7438383857728639, "learning_rate": 1.1772293969768194e-06, "loss": 0.476, "step": 12303 }, { "epoch": 1.57, "grad_norm": 0.5920170994071529, "learning_rate": 1.1765645596112223e-06, "loss": 0.4707, "step": 12304 }, { "epoch": 1.57, "grad_norm": 0.6442716906660317, "learning_rate": 1.175899884997052e-06, "loss": 0.4757, "step": 12305 }, { "epoch": 1.57, "grad_norm": 0.6604699417277907, "learning_rate": 1.1752353731626054e-06, "loss": 0.4926, "step": 12306 }, { "epoch": 1.57, "grad_norm": 0.7577657271099134, "learning_rate": 1.1745710241361636e-06, "loss": 0.4773, "step": 12307 }, { "epoch": 1.57, "grad_norm": 0.6290041947152818, "learning_rate": 1.1739068379460117e-06, "loss": 0.4524, "step": 12308 }, { "epoch": 1.57, "grad_norm": 0.598084053270403, "learning_rate": 1.1732428146204177e-06, "loss": 0.4941, "step": 12309 }, { "epoch": 1.57, "grad_norm": 2.4746104902302317, "learning_rate": 1.1725789541876503e-06, "loss": 0.5317, "step": 12310 }, { "epoch": 1.57, "grad_norm": 0.7560179685909251, "learning_rate": 1.1719152566759651e-06, "loss": 0.4816, "step": 12311 }, { "epoch": 1.57, "grad_norm": 0.7401625904600152, "learning_rate": 1.1712517221136154e-06, "loss": 0.5294, "step": 12312 }, { "epoch": 1.57, "grad_norm": 0.5775663671288473, "learning_rate": 1.1705883505288435e-06, "loss": 0.4822, "step": 12313 }, { "epoch": 1.57, "grad_norm": 0.6846668159191066, "learning_rate": 1.1699251419498903e-06, "loss": 0.4567, "step": 12314 }, { "epoch": 1.57, "grad_norm": 0.6094747064656174, "learning_rate": 1.1692620964049829e-06, "loss": 0.4708, "step": 12315 }, { "epoch": 1.57, "grad_norm": 0.745866535285704, "learning_rate": 1.1685992139223485e-06, "loss": 0.5797, "step": 12316 }, { "epoch": 1.57, "grad_norm": 0.7158661218047141, "learning_rate": 1.1679364945302008e-06, "loss": 0.561, "step": 12317 }, { "epoch": 1.57, "grad_norm": 0.5943924057714223, "learning_rate": 1.1672739382567521e-06, "loss": 0.4979, "step": 12318 }, { "epoch": 1.57, "grad_norm": 0.696281153838572, "learning_rate": 1.166611545130203e-06, "loss": 0.452, "step": 12319 }, { "epoch": 1.57, "grad_norm": 0.5969914807153506, "learning_rate": 1.1659493151787504e-06, "loss": 0.4425, "step": 12320 }, { "epoch": 1.57, "grad_norm": 0.6153005680542691, "learning_rate": 1.165287248430584e-06, "loss": 0.4893, "step": 12321 }, { "epoch": 1.57, "grad_norm": 0.6914363929158839, "learning_rate": 1.1646253449138862e-06, "loss": 0.5262, "step": 12322 }, { "epoch": 1.57, "grad_norm": 0.7454848946133966, "learning_rate": 1.1639636046568303e-06, "loss": 0.4849, "step": 12323 }, { "epoch": 1.57, "grad_norm": 0.7727006329888226, "learning_rate": 1.1633020276875872e-06, "loss": 0.4812, "step": 12324 }, { "epoch": 1.57, "grad_norm": 0.7062118020565774, "learning_rate": 1.1626406140343144e-06, "loss": 0.5411, "step": 12325 }, { "epoch": 1.57, "grad_norm": 0.6504233707624628, "learning_rate": 1.1619793637251704e-06, "loss": 0.4672, "step": 12326 }, { "epoch": 1.57, "grad_norm": 0.5998094762486235, "learning_rate": 1.1613182767882986e-06, "loss": 0.4584, "step": 12327 }, { "epoch": 1.57, "grad_norm": 0.6126104167828043, "learning_rate": 1.1606573532518423e-06, "loss": 0.4369, "step": 12328 }, { "epoch": 1.57, "grad_norm": 0.6105905479812535, "learning_rate": 1.1599965931439322e-06, "loss": 0.4483, "step": 12329 }, { "epoch": 1.57, "grad_norm": 0.5258971739285336, "learning_rate": 1.159335996492698e-06, "loss": 0.4315, "step": 12330 }, { "epoch": 1.57, "grad_norm": 0.6945284173981037, "learning_rate": 1.158675563326256e-06, "loss": 0.5053, "step": 12331 }, { "epoch": 1.57, "grad_norm": 0.7137239497657692, "learning_rate": 1.1580152936727202e-06, "loss": 0.4823, "step": 12332 }, { "epoch": 1.57, "grad_norm": 0.6309054255922194, "learning_rate": 1.1573551875601974e-06, "loss": 0.5069, "step": 12333 }, { "epoch": 1.57, "grad_norm": 0.7964236534357664, "learning_rate": 1.1566952450167834e-06, "loss": 0.4929, "step": 12334 }, { "epoch": 1.57, "grad_norm": 0.6856095242421401, "learning_rate": 1.1560354660705713e-06, "loss": 0.4466, "step": 12335 }, { "epoch": 1.57, "grad_norm": 0.6067802333165611, "learning_rate": 1.1553758507496477e-06, "loss": 0.4814, "step": 12336 }, { "epoch": 1.57, "grad_norm": 0.8100015560258925, "learning_rate": 1.1547163990820864e-06, "loss": 0.5359, "step": 12337 }, { "epoch": 1.57, "grad_norm": 0.7805552226254273, "learning_rate": 1.1540571110959625e-06, "loss": 0.5183, "step": 12338 }, { "epoch": 1.57, "grad_norm": 0.6539970273713223, "learning_rate": 1.1533979868193351e-06, "loss": 0.447, "step": 12339 }, { "epoch": 1.57, "grad_norm": 0.7419615513843656, "learning_rate": 1.1527390262802657e-06, "loss": 0.533, "step": 12340 }, { "epoch": 1.57, "grad_norm": 0.9209472946844475, "learning_rate": 1.1520802295068e-06, "loss": 0.5583, "step": 12341 }, { "epoch": 1.57, "grad_norm": 0.7489228714869187, "learning_rate": 1.1514215965269842e-06, "loss": 0.4581, "step": 12342 }, { "epoch": 1.57, "grad_norm": 0.5923234366610121, "learning_rate": 1.1507631273688513e-06, "loss": 0.4512, "step": 12343 }, { "epoch": 1.57, "grad_norm": 0.7155787547729364, "learning_rate": 1.1501048220604321e-06, "loss": 0.5374, "step": 12344 }, { "epoch": 1.57, "grad_norm": 0.5587037437830794, "learning_rate": 1.1494466806297488e-06, "loss": 0.4515, "step": 12345 }, { "epoch": 1.57, "grad_norm": 0.617336076968726, "learning_rate": 1.1487887031048152e-06, "loss": 0.4886, "step": 12346 }, { "epoch": 1.57, "grad_norm": 0.5903288578910946, "learning_rate": 1.1481308895136406e-06, "loss": 0.4502, "step": 12347 }, { "epoch": 1.57, "grad_norm": 0.7161990881643912, "learning_rate": 1.1474732398842237e-06, "loss": 0.4838, "step": 12348 }, { "epoch": 1.57, "grad_norm": 0.7872587160369162, "learning_rate": 1.1468157542445619e-06, "loss": 0.5686, "step": 12349 }, { "epoch": 1.57, "grad_norm": 0.8507366661930896, "learning_rate": 1.1461584326226382e-06, "loss": 0.5241, "step": 12350 }, { "epoch": 1.57, "grad_norm": 0.7254210892424513, "learning_rate": 1.1455012750464373e-06, "loss": 0.4827, "step": 12351 }, { "epoch": 1.57, "grad_norm": 0.7137551030317384, "learning_rate": 1.1448442815439282e-06, "loss": 0.476, "step": 12352 }, { "epoch": 1.57, "grad_norm": 0.5872652499894705, "learning_rate": 1.1441874521430795e-06, "loss": 0.4327, "step": 12353 }, { "epoch": 1.57, "grad_norm": 0.644055406984293, "learning_rate": 1.1435307868718487e-06, "loss": 0.4694, "step": 12354 }, { "epoch": 1.57, "grad_norm": 0.679061565202975, "learning_rate": 1.1428742857581903e-06, "loss": 0.4619, "step": 12355 }, { "epoch": 1.57, "grad_norm": 0.6190420126527062, "learning_rate": 1.1422179488300467e-06, "loss": 0.4925, "step": 12356 }, { "epoch": 1.57, "grad_norm": 0.7131141943543449, "learning_rate": 1.1415617761153568e-06, "loss": 0.4772, "step": 12357 }, { "epoch": 1.57, "grad_norm": 0.6170239259974337, "learning_rate": 1.1409057676420527e-06, "loss": 0.4691, "step": 12358 }, { "epoch": 1.57, "grad_norm": 0.7376708182270733, "learning_rate": 1.1402499234380592e-06, "loss": 0.5457, "step": 12359 }, { "epoch": 1.57, "grad_norm": 0.6971775354552536, "learning_rate": 1.1395942435312913e-06, "loss": 0.4844, "step": 12360 }, { "epoch": 1.57, "grad_norm": 0.583030293137026, "learning_rate": 1.1389387279496616e-06, "loss": 0.4449, "step": 12361 }, { "epoch": 1.57, "grad_norm": 0.586577214204278, "learning_rate": 1.1382833767210705e-06, "loss": 0.4791, "step": 12362 }, { "epoch": 1.58, "grad_norm": 0.8067370399394035, "learning_rate": 1.1376281898734176e-06, "loss": 0.5484, "step": 12363 }, { "epoch": 1.58, "grad_norm": 0.6218580289394675, "learning_rate": 1.1369731674345885e-06, "loss": 0.4655, "step": 12364 }, { "epoch": 1.58, "grad_norm": 1.06061404823353, "learning_rate": 1.1363183094324692e-06, "loss": 0.5058, "step": 12365 }, { "epoch": 1.58, "grad_norm": 0.7075347946731205, "learning_rate": 1.1356636158949308e-06, "loss": 0.4419, "step": 12366 }, { "epoch": 1.58, "grad_norm": 0.637850508586846, "learning_rate": 1.1350090868498454e-06, "loss": 0.4724, "step": 12367 }, { "epoch": 1.58, "grad_norm": 0.7102049823478476, "learning_rate": 1.134354722325071e-06, "loss": 0.4655, "step": 12368 }, { "epoch": 1.58, "grad_norm": 0.5693048529665501, "learning_rate": 1.1337005223484632e-06, "loss": 0.4534, "step": 12369 }, { "epoch": 1.58, "grad_norm": 0.6328283920290507, "learning_rate": 1.133046486947869e-06, "loss": 0.4805, "step": 12370 }, { "epoch": 1.58, "grad_norm": 0.7871583608081807, "learning_rate": 1.1323926161511306e-06, "loss": 0.5071, "step": 12371 }, { "epoch": 1.58, "grad_norm": 0.842354931050318, "learning_rate": 1.1317389099860782e-06, "loss": 0.4805, "step": 12372 }, { "epoch": 1.58, "grad_norm": 0.7383795153736614, "learning_rate": 1.1310853684805401e-06, "loss": 0.5327, "step": 12373 }, { "epoch": 1.58, "grad_norm": 0.6307663155397372, "learning_rate": 1.1304319916623335e-06, "loss": 0.4474, "step": 12374 }, { "epoch": 1.58, "grad_norm": 0.7074221502761343, "learning_rate": 1.129778779559273e-06, "loss": 0.4645, "step": 12375 }, { "epoch": 1.58, "grad_norm": 0.7194388640373099, "learning_rate": 1.1291257321991606e-06, "loss": 0.5487, "step": 12376 }, { "epoch": 1.58, "grad_norm": 0.7346355238851677, "learning_rate": 1.128472849609798e-06, "loss": 0.5274, "step": 12377 }, { "epoch": 1.58, "grad_norm": 0.6325951640704466, "learning_rate": 1.127820131818973e-06, "loss": 0.4833, "step": 12378 }, { "epoch": 1.58, "grad_norm": 0.762622835415484, "learning_rate": 1.1271675788544728e-06, "loss": 0.4562, "step": 12379 }, { "epoch": 1.58, "grad_norm": 0.5613287092953302, "learning_rate": 1.1265151907440718e-06, "loss": 0.4286, "step": 12380 }, { "epoch": 1.58, "grad_norm": 0.5635407528250036, "learning_rate": 1.1258629675155425e-06, "loss": 0.4074, "step": 12381 }, { "epoch": 1.58, "grad_norm": 0.6684364007026579, "learning_rate": 1.1252109091966435e-06, "loss": 0.4821, "step": 12382 }, { "epoch": 1.58, "grad_norm": 0.6494881713856422, "learning_rate": 1.1245590158151376e-06, "loss": 0.4522, "step": 12383 }, { "epoch": 1.58, "grad_norm": 0.647872070288439, "learning_rate": 1.1239072873987683e-06, "loss": 0.4777, "step": 12384 }, { "epoch": 1.58, "grad_norm": 0.6850306673113415, "learning_rate": 1.123255723975281e-06, "loss": 0.5153, "step": 12385 }, { "epoch": 1.58, "grad_norm": 0.593575944668411, "learning_rate": 1.122604325572408e-06, "loss": 0.5046, "step": 12386 }, { "epoch": 1.58, "grad_norm": 0.693534112886714, "learning_rate": 1.1219530922178794e-06, "loss": 0.5476, "step": 12387 }, { "epoch": 1.58, "grad_norm": 0.726839314596082, "learning_rate": 1.121302023939414e-06, "loss": 0.5656, "step": 12388 }, { "epoch": 1.58, "grad_norm": 0.8782950575254773, "learning_rate": 1.1206511207647285e-06, "loss": 0.549, "step": 12389 }, { "epoch": 1.58, "grad_norm": 0.685133939334505, "learning_rate": 1.1200003827215272e-06, "loss": 0.4936, "step": 12390 }, { "epoch": 1.58, "grad_norm": 0.8731837695648105, "learning_rate": 1.1193498098375116e-06, "loss": 0.5382, "step": 12391 }, { "epoch": 1.58, "grad_norm": 0.6073642639319529, "learning_rate": 1.1186994021403723e-06, "loss": 0.4266, "step": 12392 }, { "epoch": 1.58, "grad_norm": 0.6187266550587338, "learning_rate": 1.1180491596577986e-06, "loss": 0.4838, "step": 12393 }, { "epoch": 1.58, "grad_norm": 0.8285100612043065, "learning_rate": 1.1173990824174658e-06, "loss": 0.5172, "step": 12394 }, { "epoch": 1.58, "grad_norm": 0.789166611426693, "learning_rate": 1.1167491704470467e-06, "loss": 0.4983, "step": 12395 }, { "epoch": 1.58, "grad_norm": 0.5716490296813594, "learning_rate": 1.1160994237742062e-06, "loss": 0.4259, "step": 12396 }, { "epoch": 1.58, "grad_norm": 0.7385313563755456, "learning_rate": 1.1154498424266036e-06, "loss": 0.5033, "step": 12397 }, { "epoch": 1.58, "grad_norm": 0.8642960566751557, "learning_rate": 1.1148004264318868e-06, "loss": 0.5159, "step": 12398 }, { "epoch": 1.58, "grad_norm": 0.6672519090241492, "learning_rate": 1.114151175817702e-06, "loss": 0.4651, "step": 12399 }, { "epoch": 1.58, "grad_norm": 0.6473380915981833, "learning_rate": 1.1135020906116823e-06, "loss": 0.4752, "step": 12400 }, { "epoch": 1.58, "grad_norm": 0.8320651416817926, "learning_rate": 1.1128531708414609e-06, "loss": 0.5493, "step": 12401 }, { "epoch": 1.58, "grad_norm": 0.5661735405880884, "learning_rate": 1.1122044165346568e-06, "loss": 0.4437, "step": 12402 }, { "epoch": 1.58, "grad_norm": 0.6973122378599873, "learning_rate": 1.1115558277188888e-06, "loss": 0.5081, "step": 12403 }, { "epoch": 1.58, "grad_norm": 0.6443888357532452, "learning_rate": 1.110907404421762e-06, "loss": 0.4915, "step": 12404 }, { "epoch": 1.58, "grad_norm": 0.5921432303366025, "learning_rate": 1.1102591466708806e-06, "loss": 0.4405, "step": 12405 }, { "epoch": 1.58, "grad_norm": 0.6001108987068897, "learning_rate": 1.1096110544938365e-06, "loss": 0.5006, "step": 12406 }, { "epoch": 1.58, "grad_norm": 0.6557915270373862, "learning_rate": 1.1089631279182172e-06, "loss": 0.5113, "step": 12407 }, { "epoch": 1.58, "grad_norm": 0.6708251991350578, "learning_rate": 1.1083153669716058e-06, "loss": 0.4927, "step": 12408 }, { "epoch": 1.58, "grad_norm": 0.7166415410737362, "learning_rate": 1.1076677716815709e-06, "loss": 0.5524, "step": 12409 }, { "epoch": 1.58, "grad_norm": 0.6543705847246153, "learning_rate": 1.1070203420756824e-06, "loss": 0.528, "step": 12410 }, { "epoch": 1.58, "grad_norm": 0.6225086191883625, "learning_rate": 1.1063730781814968e-06, "loss": 0.4551, "step": 12411 }, { "epoch": 1.58, "grad_norm": 0.7566265393707485, "learning_rate": 1.1057259800265685e-06, "loss": 0.5013, "step": 12412 }, { "epoch": 1.58, "grad_norm": 0.705168217147271, "learning_rate": 1.1050790476384387e-06, "loss": 0.5102, "step": 12413 }, { "epoch": 1.58, "grad_norm": 0.7544410019023697, "learning_rate": 1.1044322810446495e-06, "loss": 0.511, "step": 12414 }, { "epoch": 1.58, "grad_norm": 0.5570423469396895, "learning_rate": 1.1037856802727282e-06, "loss": 0.4293, "step": 12415 }, { "epoch": 1.58, "grad_norm": 0.6025056264587775, "learning_rate": 1.1031392453502016e-06, "loss": 0.5045, "step": 12416 }, { "epoch": 1.58, "grad_norm": 0.7649673681600453, "learning_rate": 1.1024929763045826e-06, "loss": 0.561, "step": 12417 }, { "epoch": 1.58, "grad_norm": 0.7689333940488212, "learning_rate": 1.1018468731633853e-06, "loss": 0.5213, "step": 12418 }, { "epoch": 1.58, "grad_norm": 0.6571862993090566, "learning_rate": 1.1012009359541064e-06, "loss": 0.4023, "step": 12419 }, { "epoch": 1.58, "grad_norm": 0.6224293004309707, "learning_rate": 1.1005551647042478e-06, "loss": 0.4763, "step": 12420 }, { "epoch": 1.58, "grad_norm": 0.9898622675459934, "learning_rate": 1.0999095594412935e-06, "loss": 0.4789, "step": 12421 }, { "epoch": 1.58, "grad_norm": 0.6551993434591546, "learning_rate": 1.0992641201927278e-06, "loss": 0.4368, "step": 12422 }, { "epoch": 1.58, "grad_norm": 0.6074057818095697, "learning_rate": 1.0986188469860227e-06, "loss": 0.4644, "step": 12423 }, { "epoch": 1.58, "grad_norm": 0.7893196522403325, "learning_rate": 1.0979737398486468e-06, "loss": 0.5018, "step": 12424 }, { "epoch": 1.58, "grad_norm": 0.6533582913013297, "learning_rate": 1.0973287988080588e-06, "loss": 0.5156, "step": 12425 }, { "epoch": 1.58, "grad_norm": 0.8024593381200521, "learning_rate": 1.0966840238917137e-06, "loss": 0.5577, "step": 12426 }, { "epoch": 1.58, "grad_norm": 0.6707111545952976, "learning_rate": 1.0960394151270553e-06, "loss": 0.4548, "step": 12427 }, { "epoch": 1.58, "grad_norm": 0.5441301439105837, "learning_rate": 1.0953949725415252e-06, "loss": 0.4113, "step": 12428 }, { "epoch": 1.58, "grad_norm": 0.626874255514243, "learning_rate": 1.0947506961625526e-06, "loss": 0.4364, "step": 12429 }, { "epoch": 1.58, "grad_norm": 0.7434785927183007, "learning_rate": 1.0941065860175647e-06, "loss": 0.5426, "step": 12430 }, { "epoch": 1.58, "grad_norm": 0.8254638982069235, "learning_rate": 1.093462642133975e-06, "loss": 0.5304, "step": 12431 }, { "epoch": 1.58, "grad_norm": 0.596545871989964, "learning_rate": 1.0928188645392002e-06, "loss": 0.4401, "step": 12432 }, { "epoch": 1.58, "grad_norm": 0.6291141277048712, "learning_rate": 1.0921752532606388e-06, "loss": 0.4742, "step": 12433 }, { "epoch": 1.58, "grad_norm": 0.7088870282527755, "learning_rate": 1.0915318083256914e-06, "loss": 0.5412, "step": 12434 }, { "epoch": 1.58, "grad_norm": 0.76312152501484, "learning_rate": 1.0908885297617433e-06, "loss": 0.4869, "step": 12435 }, { "epoch": 1.58, "grad_norm": 0.6603879061152835, "learning_rate": 1.0902454175961802e-06, "loss": 0.4758, "step": 12436 }, { "epoch": 1.58, "grad_norm": 0.7328496481979246, "learning_rate": 1.0896024718563747e-06, "loss": 0.5109, "step": 12437 }, { "epoch": 1.58, "grad_norm": 0.6108229392928778, "learning_rate": 1.0889596925696976e-06, "loss": 0.4329, "step": 12438 }, { "epoch": 1.58, "grad_norm": 0.6747185564731631, "learning_rate": 1.0883170797635069e-06, "loss": 0.442, "step": 12439 }, { "epoch": 1.58, "grad_norm": 0.6025143379484276, "learning_rate": 1.0876746334651595e-06, "loss": 0.4391, "step": 12440 }, { "epoch": 1.58, "grad_norm": 0.7740659226646713, "learning_rate": 1.087032353702e-06, "loss": 0.4792, "step": 12441 }, { "epoch": 1.59, "grad_norm": 0.6023737291497281, "learning_rate": 1.0863902405013699e-06, "loss": 0.4368, "step": 12442 }, { "epoch": 1.59, "grad_norm": 0.8471887543694859, "learning_rate": 1.0857482938906e-06, "loss": 0.5449, "step": 12443 }, { "epoch": 1.59, "grad_norm": 0.8337551824908177, "learning_rate": 1.0851065138970174e-06, "loss": 0.4924, "step": 12444 }, { "epoch": 1.59, "grad_norm": 0.7342428842904125, "learning_rate": 1.0844649005479402e-06, "loss": 0.4545, "step": 12445 }, { "epoch": 1.59, "grad_norm": 0.696458108104308, "learning_rate": 1.083823453870681e-06, "loss": 0.4196, "step": 12446 }, { "epoch": 1.59, "grad_norm": 0.6234353285168818, "learning_rate": 1.0831821738925418e-06, "loss": 0.4924, "step": 12447 }, { "epoch": 1.59, "grad_norm": 0.7668857630135563, "learning_rate": 1.0825410606408227e-06, "loss": 0.544, "step": 12448 }, { "epoch": 1.59, "grad_norm": 0.6863881784559689, "learning_rate": 1.0819001141428103e-06, "loss": 0.4999, "step": 12449 }, { "epoch": 1.59, "grad_norm": 0.734738135824825, "learning_rate": 1.0812593344257916e-06, "loss": 0.4967, "step": 12450 }, { "epoch": 1.59, "grad_norm": 0.7798931091354768, "learning_rate": 1.0806187215170388e-06, "loss": 0.5438, "step": 12451 }, { "epoch": 1.59, "grad_norm": 0.6631066047631436, "learning_rate": 1.0799782754438238e-06, "loss": 0.4881, "step": 12452 }, { "epoch": 1.59, "grad_norm": 0.5854358661791961, "learning_rate": 1.079337996233406e-06, "loss": 0.4095, "step": 12453 }, { "epoch": 1.59, "grad_norm": 0.6746836853476859, "learning_rate": 1.0786978839130418e-06, "loss": 0.4678, "step": 12454 }, { "epoch": 1.59, "grad_norm": 0.7752064056526089, "learning_rate": 1.0780579385099771e-06, "loss": 0.5486, "step": 12455 }, { "epoch": 1.59, "grad_norm": 0.8123653732768136, "learning_rate": 1.0774181600514533e-06, "loss": 0.5137, "step": 12456 }, { "epoch": 1.59, "grad_norm": 0.6205716751194332, "learning_rate": 1.0767785485647047e-06, "loss": 0.4768, "step": 12457 }, { "epoch": 1.59, "grad_norm": 0.8002223470064728, "learning_rate": 1.0761391040769554e-06, "loss": 0.5267, "step": 12458 }, { "epoch": 1.59, "grad_norm": 0.6618194195024893, "learning_rate": 1.0754998266154255e-06, "loss": 0.472, "step": 12459 }, { "epoch": 1.59, "grad_norm": 0.875514070180337, "learning_rate": 1.0748607162073283e-06, "loss": 0.5948, "step": 12460 }, { "epoch": 1.59, "grad_norm": 0.735376010652658, "learning_rate": 1.0742217728798659e-06, "loss": 0.5414, "step": 12461 }, { "epoch": 1.59, "grad_norm": 0.6747537678111155, "learning_rate": 1.0735829966602397e-06, "loss": 0.418, "step": 12462 }, { "epoch": 1.59, "grad_norm": 0.6748445875719747, "learning_rate": 1.0729443875756363e-06, "loss": 0.4923, "step": 12463 }, { "epoch": 1.59, "grad_norm": 0.6734514943968585, "learning_rate": 1.072305945653243e-06, "loss": 0.4758, "step": 12464 }, { "epoch": 1.59, "grad_norm": 0.7553759501116974, "learning_rate": 1.0716676709202328e-06, "loss": 0.4985, "step": 12465 }, { "epoch": 1.59, "grad_norm": 0.6871281694988058, "learning_rate": 1.0710295634037783e-06, "loss": 0.5097, "step": 12466 }, { "epoch": 1.59, "grad_norm": 0.7672754971748228, "learning_rate": 1.0703916231310385e-06, "loss": 0.5095, "step": 12467 }, { "epoch": 1.59, "grad_norm": 0.6438093328673342, "learning_rate": 1.0697538501291704e-06, "loss": 0.4755, "step": 12468 }, { "epoch": 1.59, "grad_norm": 0.6904457429183142, "learning_rate": 1.0691162444253229e-06, "loss": 0.5016, "step": 12469 }, { "epoch": 1.59, "grad_norm": 0.814563406017043, "learning_rate": 1.0684788060466338e-06, "loss": 0.4905, "step": 12470 }, { "epoch": 1.59, "grad_norm": 0.712362294933189, "learning_rate": 1.0678415350202402e-06, "loss": 0.5168, "step": 12471 }, { "epoch": 1.59, "grad_norm": 0.7540432600069792, "learning_rate": 1.067204431373266e-06, "loss": 0.5199, "step": 12472 }, { "epoch": 1.59, "grad_norm": 0.6908112681676623, "learning_rate": 1.0665674951328326e-06, "loss": 0.5641, "step": 12473 }, { "epoch": 1.59, "grad_norm": 0.5724467159258316, "learning_rate": 1.0659307263260504e-06, "loss": 0.4158, "step": 12474 }, { "epoch": 1.59, "grad_norm": 0.6518556371501317, "learning_rate": 1.065294124980027e-06, "loss": 0.4611, "step": 12475 }, { "epoch": 1.59, "grad_norm": 0.7073720041583019, "learning_rate": 1.0646576911218576e-06, "loss": 0.47, "step": 12476 }, { "epoch": 1.59, "grad_norm": 0.6820294358847813, "learning_rate": 1.0640214247786368e-06, "loss": 0.4825, "step": 12477 }, { "epoch": 1.59, "grad_norm": 0.8666876521994353, "learning_rate": 1.0633853259774441e-06, "loss": 0.4793, "step": 12478 }, { "epoch": 1.59, "grad_norm": 0.6197105648977901, "learning_rate": 1.0627493947453604e-06, "loss": 0.4666, "step": 12479 }, { "epoch": 1.59, "grad_norm": 0.7195826597017622, "learning_rate": 1.0621136311094514e-06, "loss": 0.5343, "step": 12480 }, { "epoch": 1.59, "grad_norm": 0.9673020890606007, "learning_rate": 1.0614780350967824e-06, "loss": 0.5906, "step": 12481 }, { "epoch": 1.59, "grad_norm": 0.7240381127425561, "learning_rate": 1.0608426067344068e-06, "loss": 0.5404, "step": 12482 }, { "epoch": 1.59, "grad_norm": 0.8883617278973119, "learning_rate": 1.0602073460493757e-06, "loss": 0.5534, "step": 12483 }, { "epoch": 1.59, "grad_norm": 0.6324573631862704, "learning_rate": 1.0595722530687264e-06, "loss": 0.4633, "step": 12484 }, { "epoch": 1.59, "grad_norm": 0.5705562280287957, "learning_rate": 1.0589373278194958e-06, "loss": 0.4465, "step": 12485 }, { "epoch": 1.59, "grad_norm": 0.6813249887718699, "learning_rate": 1.0583025703287082e-06, "loss": 0.4356, "step": 12486 }, { "epoch": 1.59, "grad_norm": 0.668865898224227, "learning_rate": 1.0576679806233853e-06, "loss": 0.4551, "step": 12487 }, { "epoch": 1.59, "grad_norm": 0.7562990706625373, "learning_rate": 1.0570335587305375e-06, "loss": 0.494, "step": 12488 }, { "epoch": 1.59, "grad_norm": 0.8920849104170614, "learning_rate": 1.0563993046771725e-06, "loss": 0.565, "step": 12489 }, { "epoch": 1.59, "grad_norm": 0.7341095172677672, "learning_rate": 1.0557652184902855e-06, "loss": 0.4848, "step": 12490 }, { "epoch": 1.59, "grad_norm": 0.7506248492920268, "learning_rate": 1.0551313001968711e-06, "loss": 0.4844, "step": 12491 }, { "epoch": 1.59, "grad_norm": 0.9161936135635601, "learning_rate": 1.0544975498239097e-06, "loss": 0.5005, "step": 12492 }, { "epoch": 1.59, "grad_norm": 0.6279738380580488, "learning_rate": 1.0538639673983797e-06, "loss": 0.4203, "step": 12493 }, { "epoch": 1.59, "grad_norm": 0.6551651438939093, "learning_rate": 1.0532305529472503e-06, "loss": 0.4837, "step": 12494 }, { "epoch": 1.59, "grad_norm": 0.8294251575820089, "learning_rate": 1.0525973064974854e-06, "loss": 0.5726, "step": 12495 }, { "epoch": 1.59, "grad_norm": 0.6842571119878293, "learning_rate": 1.0519642280760378e-06, "loss": 0.4823, "step": 12496 }, { "epoch": 1.59, "grad_norm": 0.7768751005088844, "learning_rate": 1.0513313177098583e-06, "loss": 0.5794, "step": 12497 }, { "epoch": 1.59, "grad_norm": 0.7788419935063551, "learning_rate": 1.0506985754258852e-06, "loss": 0.476, "step": 12498 }, { "epoch": 1.59, "grad_norm": 0.6234408645210947, "learning_rate": 1.050066001251055e-06, "loss": 0.4707, "step": 12499 }, { "epoch": 1.59, "grad_norm": 0.7431956443714192, "learning_rate": 1.0494335952122913e-06, "loss": 0.5198, "step": 12500 }, { "epoch": 1.59, "grad_norm": 0.6930526613657506, "learning_rate": 1.0488013573365168e-06, "loss": 0.5076, "step": 12501 }, { "epoch": 1.59, "grad_norm": 0.6049773813762468, "learning_rate": 1.048169287650641e-06, "loss": 0.4745, "step": 12502 }, { "epoch": 1.59, "grad_norm": 0.6942741354684965, "learning_rate": 1.047537386181572e-06, "loss": 0.47, "step": 12503 }, { "epoch": 1.59, "grad_norm": 1.1111118041485089, "learning_rate": 1.0469056529562043e-06, "loss": 0.4717, "step": 12504 }, { "epoch": 1.59, "grad_norm": 1.019133687368023, "learning_rate": 1.0462740880014322e-06, "loss": 0.481, "step": 12505 }, { "epoch": 1.59, "grad_norm": 0.6635400508640572, "learning_rate": 1.0456426913441353e-06, "loss": 0.4374, "step": 12506 }, { "epoch": 1.59, "grad_norm": 0.7823269292425624, "learning_rate": 1.0450114630111958e-06, "loss": 0.4956, "step": 12507 }, { "epoch": 1.59, "grad_norm": 0.6479406727661664, "learning_rate": 1.0443804030294779e-06, "loss": 0.4352, "step": 12508 }, { "epoch": 1.59, "grad_norm": 0.6728843950927008, "learning_rate": 1.0437495114258473e-06, "loss": 0.4426, "step": 12509 }, { "epoch": 1.59, "grad_norm": 0.7882558469351173, "learning_rate": 1.0431187882271564e-06, "loss": 0.5034, "step": 12510 }, { "epoch": 1.59, "grad_norm": 0.7017967725493846, "learning_rate": 1.042488233460256e-06, "loss": 0.4957, "step": 12511 }, { "epoch": 1.59, "grad_norm": 1.040715846810283, "learning_rate": 1.0418578471519836e-06, "loss": 0.5178, "step": 12512 }, { "epoch": 1.59, "grad_norm": 0.7723760679529916, "learning_rate": 1.041227629329175e-06, "loss": 0.5137, "step": 12513 }, { "epoch": 1.59, "grad_norm": 0.573099176834312, "learning_rate": 1.0405975800186552e-06, "loss": 0.4491, "step": 12514 }, { "epoch": 1.59, "grad_norm": 0.6638323616994304, "learning_rate": 1.0399676992472451e-06, "loss": 0.5226, "step": 12515 }, { "epoch": 1.59, "grad_norm": 0.6181841777155935, "learning_rate": 1.039337987041754e-06, "loss": 0.4644, "step": 12516 }, { "epoch": 1.59, "grad_norm": 0.5981400519336075, "learning_rate": 1.0387084434289901e-06, "loss": 0.4756, "step": 12517 }, { "epoch": 1.59, "grad_norm": 0.6387767112853806, "learning_rate": 1.0380790684357478e-06, "loss": 0.4562, "step": 12518 }, { "epoch": 1.59, "grad_norm": 0.9894872507180449, "learning_rate": 1.037449862088819e-06, "loss": 0.5238, "step": 12519 }, { "epoch": 1.6, "grad_norm": 0.68367599696533, "learning_rate": 1.036820824414987e-06, "loss": 0.4518, "step": 12520 }, { "epoch": 1.6, "grad_norm": 0.5735832153213576, "learning_rate": 1.0361919554410294e-06, "loss": 0.3861, "step": 12521 }, { "epoch": 1.6, "grad_norm": 0.7231103208342743, "learning_rate": 1.0355632551937123e-06, "loss": 0.4649, "step": 12522 }, { "epoch": 1.6, "grad_norm": 0.7407415779091522, "learning_rate": 1.0349347236998003e-06, "loss": 0.4864, "step": 12523 }, { "epoch": 1.6, "grad_norm": 0.5957709813216934, "learning_rate": 1.0343063609860454e-06, "loss": 0.4793, "step": 12524 }, { "epoch": 1.6, "grad_norm": 0.7884298922014418, "learning_rate": 1.0336781670791974e-06, "loss": 0.5116, "step": 12525 }, { "epoch": 1.6, "grad_norm": 0.7151594147538668, "learning_rate": 1.0330501420059935e-06, "loss": 0.456, "step": 12526 }, { "epoch": 1.6, "grad_norm": 0.7978677501835424, "learning_rate": 1.0324222857931704e-06, "loss": 0.5653, "step": 12527 }, { "epoch": 1.6, "grad_norm": 0.8004551223839211, "learning_rate": 1.0317945984674505e-06, "loss": 0.5176, "step": 12528 }, { "epoch": 1.6, "grad_norm": 0.611119681071107, "learning_rate": 1.0311670800555556e-06, "loss": 0.446, "step": 12529 }, { "epoch": 1.6, "grad_norm": 0.6413129109052047, "learning_rate": 1.0305397305841936e-06, "loss": 0.4608, "step": 12530 }, { "epoch": 1.6, "grad_norm": 0.8304951248689911, "learning_rate": 1.0299125500800712e-06, "loss": 0.4636, "step": 12531 }, { "epoch": 1.6, "grad_norm": 0.8132496762302249, "learning_rate": 1.0292855385698864e-06, "loss": 0.526, "step": 12532 }, { "epoch": 1.6, "grad_norm": 0.7416048259579014, "learning_rate": 1.0286586960803257e-06, "loss": 0.5373, "step": 12533 }, { "epoch": 1.6, "grad_norm": 0.7860892959713269, "learning_rate": 1.0280320226380753e-06, "loss": 0.5466, "step": 12534 }, { "epoch": 1.6, "grad_norm": 0.7240975207561878, "learning_rate": 1.0274055182698077e-06, "loss": 0.4875, "step": 12535 }, { "epoch": 1.6, "grad_norm": 0.6070105528086054, "learning_rate": 1.0267791830021945e-06, "loss": 0.4806, "step": 12536 }, { "epoch": 1.6, "grad_norm": 0.7583072869659956, "learning_rate": 1.0261530168618927e-06, "loss": 0.5616, "step": 12537 }, { "epoch": 1.6, "grad_norm": 0.8434417259412448, "learning_rate": 1.0255270198755602e-06, "loss": 0.497, "step": 12538 }, { "epoch": 1.6, "grad_norm": 0.5868742684227805, "learning_rate": 1.0249011920698404e-06, "loss": 0.4382, "step": 12539 }, { "epoch": 1.6, "grad_norm": 0.6229371058962254, "learning_rate": 1.0242755334713761e-06, "loss": 0.4493, "step": 12540 }, { "epoch": 1.6, "grad_norm": 0.6732169773582896, "learning_rate": 1.023650044106796e-06, "loss": 0.446, "step": 12541 }, { "epoch": 1.6, "grad_norm": 0.7150737370985929, "learning_rate": 1.023024724002728e-06, "loss": 0.47, "step": 12542 }, { "epoch": 1.6, "grad_norm": 0.652642046876857, "learning_rate": 1.022399573185786e-06, "loss": 0.4453, "step": 12543 }, { "epoch": 1.6, "grad_norm": 0.7200884683972535, "learning_rate": 1.0217745916825872e-06, "loss": 0.5592, "step": 12544 }, { "epoch": 1.6, "grad_norm": 0.7098376144644678, "learning_rate": 1.0211497795197296e-06, "loss": 0.4628, "step": 12545 }, { "epoch": 1.6, "grad_norm": 0.7104851891338493, "learning_rate": 1.0205251367238122e-06, "loss": 0.4669, "step": 12546 }, { "epoch": 1.6, "grad_norm": 0.7633816561549547, "learning_rate": 1.0199006633214225e-06, "loss": 0.501, "step": 12547 }, { "epoch": 1.6, "grad_norm": 0.7145222553235993, "learning_rate": 1.0192763593391442e-06, "loss": 0.4938, "step": 12548 }, { "epoch": 1.6, "grad_norm": 0.7102398779635278, "learning_rate": 1.018652224803549e-06, "loss": 0.5024, "step": 12549 }, { "epoch": 1.6, "grad_norm": 0.7358628567354526, "learning_rate": 1.0180282597412078e-06, "loss": 0.4751, "step": 12550 }, { "epoch": 1.6, "grad_norm": 0.7873542192865367, "learning_rate": 1.0174044641786772e-06, "loss": 0.578, "step": 12551 }, { "epoch": 1.6, "grad_norm": 0.9613680823410571, "learning_rate": 1.0167808381425136e-06, "loss": 0.5563, "step": 12552 }, { "epoch": 1.6, "grad_norm": 0.6616446176371168, "learning_rate": 1.0161573816592601e-06, "loss": 0.474, "step": 12553 }, { "epoch": 1.6, "grad_norm": 0.6402413797474386, "learning_rate": 1.0155340947554571e-06, "loss": 0.4756, "step": 12554 }, { "epoch": 1.6, "grad_norm": 0.5115269316011669, "learning_rate": 1.0149109774576332e-06, "loss": 0.3818, "step": 12555 }, { "epoch": 1.6, "grad_norm": 0.5755264175488575, "learning_rate": 1.0142880297923164e-06, "loss": 0.4445, "step": 12556 }, { "epoch": 1.6, "grad_norm": 0.635856957453188, "learning_rate": 1.0136652517860207e-06, "loss": 0.4709, "step": 12557 }, { "epoch": 1.6, "grad_norm": 0.6713875939656742, "learning_rate": 1.013042643465258e-06, "loss": 0.4747, "step": 12558 }, { "epoch": 1.6, "grad_norm": 0.7266883259125875, "learning_rate": 1.0124202048565285e-06, "loss": 0.5192, "step": 12559 }, { "epoch": 1.6, "grad_norm": 0.6458103286472738, "learning_rate": 1.011797935986329e-06, "loss": 0.4383, "step": 12560 }, { "epoch": 1.6, "grad_norm": 0.6921496632919475, "learning_rate": 1.0111758368811463e-06, "loss": 0.4684, "step": 12561 }, { "epoch": 1.6, "grad_norm": 0.7697995918647232, "learning_rate": 1.0105539075674626e-06, "loss": 0.5514, "step": 12562 }, { "epoch": 1.6, "grad_norm": 0.8131235836751919, "learning_rate": 1.0099321480717494e-06, "loss": 0.522, "step": 12563 }, { "epoch": 1.6, "grad_norm": 0.8114471121040833, "learning_rate": 1.0093105584204754e-06, "loss": 0.4703, "step": 12564 }, { "epoch": 1.6, "grad_norm": 0.719266718799247, "learning_rate": 1.0086891386400977e-06, "loss": 0.4867, "step": 12565 }, { "epoch": 1.6, "grad_norm": 0.7245504679674387, "learning_rate": 1.00806788875707e-06, "loss": 0.4833, "step": 12566 }, { "epoch": 1.6, "grad_norm": 0.6790700150207556, "learning_rate": 1.0074468087978346e-06, "loss": 0.4782, "step": 12567 }, { "epoch": 1.6, "grad_norm": 0.7013486239025825, "learning_rate": 1.0068258987888297e-06, "loss": 0.5374, "step": 12568 }, { "epoch": 1.6, "grad_norm": 0.7021774169072941, "learning_rate": 1.0062051587564865e-06, "loss": 0.5367, "step": 12569 }, { "epoch": 1.6, "grad_norm": 0.60592888829696, "learning_rate": 1.0055845887272286e-06, "loss": 0.4375, "step": 12570 }, { "epoch": 1.6, "grad_norm": 0.5996655145851489, "learning_rate": 1.0049641887274685e-06, "loss": 0.42, "step": 12571 }, { "epoch": 1.6, "grad_norm": 0.5945891584354457, "learning_rate": 1.004343958783619e-06, "loss": 0.4697, "step": 12572 }, { "epoch": 1.6, "grad_norm": 0.6030750082735562, "learning_rate": 1.0037238989220766e-06, "loss": 0.4742, "step": 12573 }, { "epoch": 1.6, "grad_norm": 0.7929404914167424, "learning_rate": 1.0031040091692395e-06, "loss": 0.5676, "step": 12574 }, { "epoch": 1.6, "grad_norm": 0.7334218822926225, "learning_rate": 1.0024842895514909e-06, "loss": 0.5721, "step": 12575 }, { "epoch": 1.6, "grad_norm": 0.7490605478146082, "learning_rate": 1.0018647400952132e-06, "loss": 0.497, "step": 12576 }, { "epoch": 1.6, "grad_norm": 0.7345590647003754, "learning_rate": 1.0012453608267758e-06, "loss": 0.4681, "step": 12577 }, { "epoch": 1.6, "grad_norm": 0.629313297231747, "learning_rate": 1.0006261517725469e-06, "loss": 0.4729, "step": 12578 }, { "epoch": 1.6, "grad_norm": 0.7557193438555481, "learning_rate": 1.0000071129588806e-06, "loss": 0.5145, "step": 12579 }, { "epoch": 1.6, "grad_norm": 0.8650580029186689, "learning_rate": 9.9938824441213e-07, "loss": 0.5888, "step": 12580 }, { "epoch": 1.6, "grad_norm": 0.7125057824068746, "learning_rate": 9.987695461586388e-07, "loss": 0.5241, "step": 12581 }, { "epoch": 1.6, "grad_norm": 0.8843555229183993, "learning_rate": 9.981510182247405e-07, "loss": 0.5105, "step": 12582 }, { "epoch": 1.6, "grad_norm": 0.7196389826081547, "learning_rate": 9.975326606367651e-07, "loss": 0.4556, "step": 12583 }, { "epoch": 1.6, "grad_norm": 0.6075419657957921, "learning_rate": 9.969144734210362e-07, "loss": 0.4558, "step": 12584 }, { "epoch": 1.6, "grad_norm": 0.7012960353947424, "learning_rate": 9.962964566038641e-07, "loss": 0.5055, "step": 12585 }, { "epoch": 1.6, "grad_norm": 0.5537379154700734, "learning_rate": 9.956786102115596e-07, "loss": 0.4366, "step": 12586 }, { "epoch": 1.6, "grad_norm": 0.6957721991896043, "learning_rate": 9.950609342704193e-07, "loss": 0.4901, "step": 12587 }, { "epoch": 1.6, "grad_norm": 0.6103188417937773, "learning_rate": 9.944434288067385e-07, "loss": 0.4814, "step": 12588 }, { "epoch": 1.6, "grad_norm": 0.63963665063704, "learning_rate": 9.938260938467998e-07, "loss": 0.4563, "step": 12589 }, { "epoch": 1.6, "grad_norm": 0.7451573241047665, "learning_rate": 9.932089294168833e-07, "loss": 0.4842, "step": 12590 }, { "epoch": 1.6, "grad_norm": 0.7657755294944113, "learning_rate": 9.925919355432578e-07, "loss": 0.5567, "step": 12591 }, { "epoch": 1.6, "grad_norm": 0.7322349915262246, "learning_rate": 9.919751122521877e-07, "loss": 0.5038, "step": 12592 }, { "epoch": 1.6, "grad_norm": 0.5726571191590609, "learning_rate": 9.913584595699305e-07, "loss": 0.4311, "step": 12593 }, { "epoch": 1.6, "grad_norm": 0.5496720340148381, "learning_rate": 9.907419775227322e-07, "loss": 0.4268, "step": 12594 }, { "epoch": 1.6, "grad_norm": 0.75439512103001, "learning_rate": 9.90125666136838e-07, "loss": 0.5142, "step": 12595 }, { "epoch": 1.6, "grad_norm": 0.781854072399982, "learning_rate": 9.89509525438479e-07, "loss": 0.4732, "step": 12596 }, { "epoch": 1.6, "grad_norm": 0.5825078420934663, "learning_rate": 9.888935554538853e-07, "loss": 0.4411, "step": 12597 }, { "epoch": 1.6, "grad_norm": 0.5702468018504597, "learning_rate": 9.882777562092738e-07, "loss": 0.4139, "step": 12598 }, { "epoch": 1.61, "grad_norm": 0.6393682295395201, "learning_rate": 9.876621277308597e-07, "loss": 0.4612, "step": 12599 }, { "epoch": 1.61, "grad_norm": 0.7193226689255974, "learning_rate": 9.87046670044846e-07, "loss": 0.5015, "step": 12600 }, { "epoch": 1.61, "grad_norm": 0.6681408894700462, "learning_rate": 9.864313831774335e-07, "loss": 0.5361, "step": 12601 }, { "epoch": 1.61, "grad_norm": 0.838166447837878, "learning_rate": 9.858162671548099e-07, "loss": 0.4865, "step": 12602 }, { "epoch": 1.61, "grad_norm": 0.62661545835579, "learning_rate": 9.85201322003162e-07, "loss": 0.4747, "step": 12603 }, { "epoch": 1.61, "grad_norm": 0.6392393760530976, "learning_rate": 9.845865477486626e-07, "loss": 0.5137, "step": 12604 }, { "epoch": 1.61, "grad_norm": 0.6278156698676469, "learning_rate": 9.839719444174827e-07, "loss": 0.5004, "step": 12605 }, { "epoch": 1.61, "grad_norm": 0.6321877961112529, "learning_rate": 9.833575120357842e-07, "loss": 0.4195, "step": 12606 }, { "epoch": 1.61, "grad_norm": 0.625953437783646, "learning_rate": 9.827432506297213e-07, "loss": 0.4219, "step": 12607 }, { "epoch": 1.61, "grad_norm": 0.645071954476846, "learning_rate": 9.821291602254408e-07, "loss": 0.4414, "step": 12608 }, { "epoch": 1.61, "grad_norm": 0.6565137094937934, "learning_rate": 9.81515240849083e-07, "loss": 0.501, "step": 12609 }, { "epoch": 1.61, "grad_norm": 0.7063790621401216, "learning_rate": 9.80901492526779e-07, "loss": 0.5022, "step": 12610 }, { "epoch": 1.61, "grad_norm": 0.8476004465326518, "learning_rate": 9.802879152846573e-07, "loss": 0.4608, "step": 12611 }, { "epoch": 1.61, "grad_norm": 0.9891392630913707, "learning_rate": 9.796745091488318e-07, "loss": 0.4581, "step": 12612 }, { "epoch": 1.61, "grad_norm": 0.630095565344662, "learning_rate": 9.79061274145417e-07, "loss": 0.4232, "step": 12613 }, { "epoch": 1.61, "grad_norm": 0.5890322033636199, "learning_rate": 9.784482103005133e-07, "loss": 0.4003, "step": 12614 }, { "epoch": 1.61, "grad_norm": 0.8056830725401428, "learning_rate": 9.778353176402194e-07, "loss": 0.4819, "step": 12615 }, { "epoch": 1.61, "grad_norm": 0.7750882356618322, "learning_rate": 9.772225961906218e-07, "loss": 0.5428, "step": 12616 }, { "epoch": 1.61, "grad_norm": 0.7254103499805589, "learning_rate": 9.766100459778032e-07, "loss": 0.4933, "step": 12617 }, { "epoch": 1.61, "grad_norm": 0.6154489943698586, "learning_rate": 9.759976670278381e-07, "loss": 0.4038, "step": 12618 }, { "epoch": 1.61, "grad_norm": 0.6668008745219817, "learning_rate": 9.75385459366795e-07, "loss": 0.4364, "step": 12619 }, { "epoch": 1.61, "grad_norm": 0.6664844620373493, "learning_rate": 9.747734230207307e-07, "loss": 0.4476, "step": 12620 }, { "epoch": 1.61, "grad_norm": 0.6645441081929409, "learning_rate": 9.741615580157004e-07, "loss": 0.479, "step": 12621 }, { "epoch": 1.61, "grad_norm": 0.9292482764604143, "learning_rate": 9.735498643777463e-07, "loss": 0.5294, "step": 12622 }, { "epoch": 1.61, "grad_norm": 0.8366933774336927, "learning_rate": 9.729383421329092e-07, "loss": 0.5545, "step": 12623 }, { "epoch": 1.61, "grad_norm": 0.8836727350273745, "learning_rate": 9.723269913072165e-07, "loss": 0.5349, "step": 12624 }, { "epoch": 1.61, "grad_norm": 0.7115701488768033, "learning_rate": 9.717158119266951e-07, "loss": 0.4764, "step": 12625 }, { "epoch": 1.61, "grad_norm": 0.6346044232959566, "learning_rate": 9.711048040173572e-07, "loss": 0.4449, "step": 12626 }, { "epoch": 1.61, "grad_norm": 0.649544984460265, "learning_rate": 9.704939676052149e-07, "loss": 0.4828, "step": 12627 }, { "epoch": 1.61, "grad_norm": 0.673965801981602, "learning_rate": 9.69883302716267e-07, "loss": 0.4381, "step": 12628 }, { "epoch": 1.61, "grad_norm": 0.6801254587989718, "learning_rate": 9.692728093765092e-07, "loss": 0.4946, "step": 12629 }, { "epoch": 1.61, "grad_norm": 0.5767455741945363, "learning_rate": 9.686624876119249e-07, "loss": 0.4536, "step": 12630 }, { "epoch": 1.61, "grad_norm": 0.6808295484056831, "learning_rate": 9.68052337448499e-07, "loss": 0.437, "step": 12631 }, { "epoch": 1.61, "grad_norm": 0.6125693779148296, "learning_rate": 9.674423589121996e-07, "loss": 0.458, "step": 12632 }, { "epoch": 1.61, "grad_norm": 0.6819656507917435, "learning_rate": 9.668325520289939e-07, "loss": 0.533, "step": 12633 }, { "epoch": 1.61, "grad_norm": 0.6210992440087747, "learning_rate": 9.662229168248372e-07, "loss": 0.4805, "step": 12634 }, { "epoch": 1.61, "grad_norm": 0.6845881848214604, "learning_rate": 9.656134533256817e-07, "loss": 0.5505, "step": 12635 }, { "epoch": 1.61, "grad_norm": 1.0200433458933695, "learning_rate": 9.650041615574684e-07, "loss": 0.5142, "step": 12636 }, { "epoch": 1.61, "grad_norm": 0.6248885824907352, "learning_rate": 9.643950415461357e-07, "loss": 0.4604, "step": 12637 }, { "epoch": 1.61, "grad_norm": 0.7233659199381125, "learning_rate": 9.637860933176086e-07, "loss": 0.5068, "step": 12638 }, { "epoch": 1.61, "grad_norm": 0.7122403730355228, "learning_rate": 9.631773168978103e-07, "loss": 0.536, "step": 12639 }, { "epoch": 1.61, "grad_norm": 0.7723807128114341, "learning_rate": 9.62568712312653e-07, "loss": 0.5028, "step": 12640 }, { "epoch": 1.61, "grad_norm": 0.6558048209762508, "learning_rate": 9.61960279588045e-07, "loss": 0.4836, "step": 12641 }, { "epoch": 1.61, "grad_norm": 0.796413898795855, "learning_rate": 9.613520187498832e-07, "loss": 0.5753, "step": 12642 }, { "epoch": 1.61, "grad_norm": 0.8078530587598295, "learning_rate": 9.607439298240601e-07, "loss": 0.5356, "step": 12643 }, { "epoch": 1.61, "grad_norm": 0.7712711889929055, "learning_rate": 9.6013601283646e-07, "loss": 0.526, "step": 12644 }, { "epoch": 1.61, "grad_norm": 0.6327055041495818, "learning_rate": 9.59528267812962e-07, "loss": 0.5156, "step": 12645 }, { "epoch": 1.61, "grad_norm": 0.7978534234836491, "learning_rate": 9.589206947794327e-07, "loss": 0.4883, "step": 12646 }, { "epoch": 1.61, "grad_norm": 0.6791982091283041, "learning_rate": 9.583132937617368e-07, "loss": 0.4775, "step": 12647 }, { "epoch": 1.61, "grad_norm": 0.6896816226112397, "learning_rate": 9.577060647857278e-07, "loss": 0.4678, "step": 12648 }, { "epoch": 1.61, "grad_norm": 0.6183015414647173, "learning_rate": 9.570990078772557e-07, "loss": 0.4506, "step": 12649 }, { "epoch": 1.61, "grad_norm": 0.6149033097854142, "learning_rate": 9.564921230621582e-07, "loss": 0.4558, "step": 12650 }, { "epoch": 1.61, "grad_norm": 0.752653149926404, "learning_rate": 9.558854103662706e-07, "loss": 0.5051, "step": 12651 }, { "epoch": 1.61, "grad_norm": 0.8326903170943053, "learning_rate": 9.552788698154175e-07, "loss": 0.5504, "step": 12652 }, { "epoch": 1.61, "grad_norm": 0.7264910852158718, "learning_rate": 9.546725014354186e-07, "loss": 0.5199, "step": 12653 }, { "epoch": 1.61, "grad_norm": 0.6390916768962019, "learning_rate": 9.540663052520827e-07, "loss": 0.455, "step": 12654 }, { "epoch": 1.61, "grad_norm": 0.814450921194002, "learning_rate": 9.534602812912158e-07, "loss": 0.5469, "step": 12655 }, { "epoch": 1.61, "grad_norm": 0.7691774401238303, "learning_rate": 9.528544295786152e-07, "loss": 0.5682, "step": 12656 }, { "epoch": 1.61, "grad_norm": 0.629262689328145, "learning_rate": 9.522487501400668e-07, "loss": 0.448, "step": 12657 }, { "epoch": 1.61, "grad_norm": 0.7951779755539609, "learning_rate": 9.516432430013562e-07, "loss": 0.5422, "step": 12658 }, { "epoch": 1.61, "grad_norm": 0.6951516762220433, "learning_rate": 9.510379081882543e-07, "loss": 0.4688, "step": 12659 }, { "epoch": 1.61, "grad_norm": 0.7028317043163737, "learning_rate": 9.504327457265316e-07, "loss": 0.4818, "step": 12660 }, { "epoch": 1.61, "grad_norm": 0.5722111122746936, "learning_rate": 9.49827755641945e-07, "loss": 0.4549, "step": 12661 }, { "epoch": 1.61, "grad_norm": 0.6200897722657969, "learning_rate": 9.492229379602496e-07, "loss": 0.4401, "step": 12662 }, { "epoch": 1.61, "grad_norm": 0.6183012471597482, "learning_rate": 9.48618292707188e-07, "loss": 0.4321, "step": 12663 }, { "epoch": 1.61, "grad_norm": 0.6211613257006006, "learning_rate": 9.48013819908501e-07, "loss": 0.5013, "step": 12664 }, { "epoch": 1.61, "grad_norm": 0.8003226485566894, "learning_rate": 9.474095195899158e-07, "loss": 0.5769, "step": 12665 }, { "epoch": 1.61, "grad_norm": 0.7048803700909131, "learning_rate": 9.46805391777158e-07, "loss": 0.4744, "step": 12666 }, { "epoch": 1.61, "grad_norm": 0.6750255339336588, "learning_rate": 9.462014364959404e-07, "loss": 0.4253, "step": 12667 }, { "epoch": 1.61, "grad_norm": 0.6485032781239637, "learning_rate": 9.455976537719764e-07, "loss": 0.4823, "step": 12668 }, { "epoch": 1.61, "grad_norm": 0.6678576656586338, "learning_rate": 9.449940436309624e-07, "loss": 0.5173, "step": 12669 }, { "epoch": 1.61, "grad_norm": 0.8569556427473463, "learning_rate": 9.443906060985952e-07, "loss": 0.4706, "step": 12670 }, { "epoch": 1.61, "grad_norm": 0.5747335443252537, "learning_rate": 9.437873412005594e-07, "loss": 0.4716, "step": 12671 }, { "epoch": 1.61, "grad_norm": 0.830448698406128, "learning_rate": 9.431842489625354e-07, "loss": 0.5054, "step": 12672 }, { "epoch": 1.61, "grad_norm": 0.7360361321025546, "learning_rate": 9.425813294101932e-07, "loss": 0.4861, "step": 12673 }, { "epoch": 1.61, "grad_norm": 0.7624892031193568, "learning_rate": 9.419785825691996e-07, "loss": 0.5407, "step": 12674 }, { "epoch": 1.61, "grad_norm": 0.7365409589930983, "learning_rate": 9.413760084652085e-07, "loss": 0.4745, "step": 12675 }, { "epoch": 1.61, "grad_norm": 0.6834851992186918, "learning_rate": 9.407736071238727e-07, "loss": 0.4977, "step": 12676 }, { "epoch": 1.62, "grad_norm": 1.1052161081638388, "learning_rate": 9.401713785708317e-07, "loss": 0.5196, "step": 12677 }, { "epoch": 1.62, "grad_norm": 0.9032835260412025, "learning_rate": 9.395693228317227e-07, "loss": 0.4612, "step": 12678 }, { "epoch": 1.62, "grad_norm": 0.7458034994746388, "learning_rate": 9.389674399321702e-07, "loss": 0.5204, "step": 12679 }, { "epoch": 1.62, "grad_norm": 0.6943693145475459, "learning_rate": 9.383657298977994e-07, "loss": 0.5082, "step": 12680 }, { "epoch": 1.62, "grad_norm": 0.6387525733653592, "learning_rate": 9.377641927542186e-07, "loss": 0.4769, "step": 12681 }, { "epoch": 1.62, "grad_norm": 0.7940566776193554, "learning_rate": 9.371628285270362e-07, "loss": 0.5054, "step": 12682 }, { "epoch": 1.62, "grad_norm": 0.9023532777659519, "learning_rate": 9.365616372418485e-07, "loss": 0.5015, "step": 12683 }, { "epoch": 1.62, "grad_norm": 0.7059657780773344, "learning_rate": 9.359606189242476e-07, "loss": 0.5319, "step": 12684 }, { "epoch": 1.62, "grad_norm": 0.7191871823887297, "learning_rate": 9.353597735998154e-07, "loss": 0.4746, "step": 12685 }, { "epoch": 1.62, "grad_norm": 0.6151946654806856, "learning_rate": 9.347591012941304e-07, "loss": 0.4784, "step": 12686 }, { "epoch": 1.62, "grad_norm": 0.5865936901501279, "learning_rate": 9.341586020327586e-07, "loss": 0.4741, "step": 12687 }, { "epoch": 1.62, "grad_norm": 0.6142389174852357, "learning_rate": 9.335582758412632e-07, "loss": 0.4456, "step": 12688 }, { "epoch": 1.62, "grad_norm": 0.6094047846428327, "learning_rate": 9.329581227451967e-07, "loss": 0.489, "step": 12689 }, { "epoch": 1.62, "grad_norm": 0.7535306156182966, "learning_rate": 9.323581427701078e-07, "loss": 0.4711, "step": 12690 }, { "epoch": 1.62, "grad_norm": 0.7790503963323167, "learning_rate": 9.317583359415333e-07, "loss": 0.531, "step": 12691 }, { "epoch": 1.62, "grad_norm": 0.8249095680447093, "learning_rate": 9.311587022850055e-07, "loss": 0.5349, "step": 12692 }, { "epoch": 1.62, "grad_norm": 0.8252964905222846, "learning_rate": 9.3055924182605e-07, "loss": 0.4892, "step": 12693 }, { "epoch": 1.62, "grad_norm": 0.5580013566411559, "learning_rate": 9.299599545901849e-07, "loss": 0.4276, "step": 12694 }, { "epoch": 1.62, "grad_norm": 0.5784883705520799, "learning_rate": 9.293608406029175e-07, "loss": 0.4918, "step": 12695 }, { "epoch": 1.62, "grad_norm": 0.7531822200860168, "learning_rate": 9.287618998897519e-07, "loss": 0.5469, "step": 12696 }, { "epoch": 1.62, "grad_norm": 0.7789178431575126, "learning_rate": 9.281631324761814e-07, "loss": 0.5711, "step": 12697 }, { "epoch": 1.62, "grad_norm": 0.6907601023454941, "learning_rate": 9.275645383876963e-07, "loss": 0.4991, "step": 12698 }, { "epoch": 1.62, "grad_norm": 1.0633496443478008, "learning_rate": 9.269661176497735e-07, "loss": 0.5271, "step": 12699 }, { "epoch": 1.62, "grad_norm": 0.7738428287870204, "learning_rate": 9.263678702878887e-07, "loss": 0.503, "step": 12700 }, { "epoch": 1.62, "grad_norm": 0.5759126527115064, "learning_rate": 9.257697963275048e-07, "loss": 0.4406, "step": 12701 }, { "epoch": 1.62, "grad_norm": 0.6487058889302549, "learning_rate": 9.251718957940831e-07, "loss": 0.4885, "step": 12702 }, { "epoch": 1.62, "grad_norm": 0.6688056795119248, "learning_rate": 9.245741687130705e-07, "loss": 0.4647, "step": 12703 }, { "epoch": 1.62, "grad_norm": 0.8804149092808448, "learning_rate": 9.239766151099127e-07, "loss": 0.5375, "step": 12704 }, { "epoch": 1.62, "grad_norm": 0.7929627775306904, "learning_rate": 9.233792350100467e-07, "loss": 0.5011, "step": 12705 }, { "epoch": 1.62, "grad_norm": 0.5940433606705414, "learning_rate": 9.227820284388983e-07, "loss": 0.4289, "step": 12706 }, { "epoch": 1.62, "grad_norm": 0.6546506392619874, "learning_rate": 9.221849954218903e-07, "loss": 0.445, "step": 12707 }, { "epoch": 1.62, "grad_norm": 0.6880461065716861, "learning_rate": 9.21588135984437e-07, "loss": 0.5056, "step": 12708 }, { "epoch": 1.62, "grad_norm": 0.6784026842471029, "learning_rate": 9.20991450151943e-07, "loss": 0.4991, "step": 12709 }, { "epoch": 1.62, "grad_norm": 0.6423937719032692, "learning_rate": 9.203949379498095e-07, "loss": 0.4256, "step": 12710 }, { "epoch": 1.62, "grad_norm": 0.6130630577708838, "learning_rate": 9.197985994034259e-07, "loss": 0.4851, "step": 12711 }, { "epoch": 1.62, "grad_norm": 0.8181148934997431, "learning_rate": 9.192024345381783e-07, "loss": 0.4681, "step": 12712 }, { "epoch": 1.62, "grad_norm": 0.5716683155030157, "learning_rate": 9.186064433794417e-07, "loss": 0.4067, "step": 12713 }, { "epoch": 1.62, "grad_norm": 0.7511205623752096, "learning_rate": 9.180106259525878e-07, "loss": 0.5304, "step": 12714 }, { "epoch": 1.62, "grad_norm": 0.8568192351441365, "learning_rate": 9.174149822829758e-07, "loss": 0.5954, "step": 12715 }, { "epoch": 1.62, "grad_norm": 0.6857590762498632, "learning_rate": 9.168195123959622e-07, "loss": 0.4667, "step": 12716 }, { "epoch": 1.62, "grad_norm": 0.6500258498059417, "learning_rate": 9.162242163168955e-07, "loss": 0.4374, "step": 12717 }, { "epoch": 1.62, "grad_norm": 0.6673717841476332, "learning_rate": 9.156290940711127e-07, "loss": 0.4673, "step": 12718 }, { "epoch": 1.62, "grad_norm": 0.7833869063220057, "learning_rate": 9.150341456839484e-07, "loss": 0.474, "step": 12719 }, { "epoch": 1.62, "grad_norm": 0.6423811493741647, "learning_rate": 9.144393711807259e-07, "loss": 0.4634, "step": 12720 }, { "epoch": 1.62, "grad_norm": 0.7575017470260669, "learning_rate": 9.138447705867648e-07, "loss": 0.5257, "step": 12721 }, { "epoch": 1.62, "grad_norm": 0.7109759820753518, "learning_rate": 9.132503439273733e-07, "loss": 0.4556, "step": 12722 }, { "epoch": 1.62, "grad_norm": 0.5622109452539684, "learning_rate": 9.126560912278564e-07, "loss": 0.4525, "step": 12723 }, { "epoch": 1.62, "grad_norm": 0.6746657062705477, "learning_rate": 9.120620125135071e-07, "loss": 0.5025, "step": 12724 }, { "epoch": 1.62, "grad_norm": 0.7483351058939582, "learning_rate": 9.114681078096165e-07, "loss": 0.5025, "step": 12725 }, { "epoch": 1.62, "grad_norm": 0.573516340700819, "learning_rate": 9.108743771414619e-07, "loss": 0.4828, "step": 12726 }, { "epoch": 1.62, "grad_norm": 0.7345849932316485, "learning_rate": 9.102808205343195e-07, "loss": 0.506, "step": 12727 }, { "epoch": 1.62, "grad_norm": 0.6397350218713659, "learning_rate": 9.096874380134518e-07, "loss": 0.5068, "step": 12728 }, { "epoch": 1.62, "grad_norm": 0.8112785417182795, "learning_rate": 9.090942296041194e-07, "loss": 0.5581, "step": 12729 }, { "epoch": 1.62, "grad_norm": 0.972647726157401, "learning_rate": 9.085011953315731e-07, "loss": 0.5408, "step": 12730 }, { "epoch": 1.62, "grad_norm": 0.7387658148952271, "learning_rate": 9.079083352210577e-07, "loss": 0.4965, "step": 12731 }, { "epoch": 1.62, "grad_norm": 0.5883788346032818, "learning_rate": 9.073156492978069e-07, "loss": 0.4347, "step": 12732 }, { "epoch": 1.62, "grad_norm": 0.7672349601291487, "learning_rate": 9.067231375870516e-07, "loss": 0.4443, "step": 12733 }, { "epoch": 1.62, "grad_norm": 0.6529750539549563, "learning_rate": 9.061308001140107e-07, "loss": 0.4594, "step": 12734 }, { "epoch": 1.62, "grad_norm": 0.6110346856390666, "learning_rate": 9.05538636903901e-07, "loss": 0.472, "step": 12735 }, { "epoch": 1.62, "grad_norm": 0.7602915301747316, "learning_rate": 9.049466479819264e-07, "loss": 0.4754, "step": 12736 }, { "epoch": 1.62, "grad_norm": 0.5494018268888615, "learning_rate": 9.043548333732877e-07, "loss": 0.4464, "step": 12737 }, { "epoch": 1.62, "grad_norm": 0.6975457301208036, "learning_rate": 9.037631931031754e-07, "loss": 0.4396, "step": 12738 }, { "epoch": 1.62, "grad_norm": 0.6513965593486007, "learning_rate": 9.031717271967749e-07, "loss": 0.5166, "step": 12739 }, { "epoch": 1.62, "grad_norm": 0.6867263995988216, "learning_rate": 9.025804356792617e-07, "loss": 0.4643, "step": 12740 }, { "epoch": 1.62, "grad_norm": 0.7497162324947245, "learning_rate": 9.019893185758055e-07, "loss": 0.5289, "step": 12741 }, { "epoch": 1.62, "grad_norm": 0.6285115775032964, "learning_rate": 9.013983759115685e-07, "loss": 0.4827, "step": 12742 }, { "epoch": 1.62, "grad_norm": 0.8905333061248007, "learning_rate": 9.008076077117072e-07, "loss": 0.4439, "step": 12743 }, { "epoch": 1.62, "grad_norm": 0.6005950954504269, "learning_rate": 9.00217014001365e-07, "loss": 0.444, "step": 12744 }, { "epoch": 1.62, "grad_norm": 0.7275173774425826, "learning_rate": 8.996265948056854e-07, "loss": 0.4523, "step": 12745 }, { "epoch": 1.62, "grad_norm": 0.8091986677248169, "learning_rate": 8.990363501497967e-07, "loss": 0.5647, "step": 12746 }, { "epoch": 1.62, "grad_norm": 1.1817351407355732, "learning_rate": 8.984462800588278e-07, "loss": 0.5335, "step": 12747 }, { "epoch": 1.62, "grad_norm": 0.7925695297589522, "learning_rate": 8.978563845578925e-07, "loss": 0.5284, "step": 12748 }, { "epoch": 1.62, "grad_norm": 0.72816580165453, "learning_rate": 8.972666636721033e-07, "loss": 0.4992, "step": 12749 }, { "epoch": 1.62, "grad_norm": 0.649935827741824, "learning_rate": 8.96677117426561e-07, "loss": 0.4486, "step": 12750 }, { "epoch": 1.62, "grad_norm": 0.6641382343788996, "learning_rate": 8.960877458463624e-07, "loss": 0.4726, "step": 12751 }, { "epoch": 1.62, "grad_norm": 0.815631349018808, "learning_rate": 8.954985489565931e-07, "loss": 0.5424, "step": 12752 }, { "epoch": 1.62, "grad_norm": 0.8891065151492951, "learning_rate": 8.949095267823354e-07, "loss": 0.4799, "step": 12753 }, { "epoch": 1.62, "grad_norm": 0.6773066469913687, "learning_rate": 8.943206793486592e-07, "loss": 0.487, "step": 12754 }, { "epoch": 1.62, "grad_norm": 0.682737490678981, "learning_rate": 8.937320066806337e-07, "loss": 0.4819, "step": 12755 }, { "epoch": 1.63, "grad_norm": 0.6960917644130543, "learning_rate": 8.931435088033141e-07, "loss": 0.4926, "step": 12756 }, { "epoch": 1.63, "grad_norm": 0.585748870886106, "learning_rate": 8.92555185741753e-07, "loss": 0.4825, "step": 12757 }, { "epoch": 1.63, "grad_norm": 0.7867794041003218, "learning_rate": 8.919670375209904e-07, "loss": 0.5404, "step": 12758 }, { "epoch": 1.63, "grad_norm": 0.932768841275756, "learning_rate": 8.913790641660653e-07, "loss": 0.508, "step": 12759 }, { "epoch": 1.63, "grad_norm": 0.830431343360483, "learning_rate": 8.907912657020029e-07, "loss": 0.5972, "step": 12760 }, { "epoch": 1.63, "grad_norm": 0.7561375304558007, "learning_rate": 8.902036421538263e-07, "loss": 0.5098, "step": 12761 }, { "epoch": 1.63, "grad_norm": 0.6320969299083288, "learning_rate": 8.896161935465464e-07, "loss": 0.4456, "step": 12762 }, { "epoch": 1.63, "grad_norm": 0.6945163730556853, "learning_rate": 8.890289199051716e-07, "loss": 0.513, "step": 12763 }, { "epoch": 1.63, "grad_norm": 0.8399170420812819, "learning_rate": 8.884418212546974e-07, "loss": 0.5102, "step": 12764 }, { "epoch": 1.63, "grad_norm": 0.8766115518683961, "learning_rate": 8.878548976201179e-07, "loss": 0.4987, "step": 12765 }, { "epoch": 1.63, "grad_norm": 0.7449738451393177, "learning_rate": 8.872681490264129e-07, "loss": 0.5072, "step": 12766 }, { "epoch": 1.63, "grad_norm": 0.6576821696355993, "learning_rate": 8.866815754985608e-07, "loss": 0.3808, "step": 12767 }, { "epoch": 1.63, "grad_norm": 0.6802813448952577, "learning_rate": 8.8609517706153e-07, "loss": 0.4302, "step": 12768 }, { "epoch": 1.63, "grad_norm": 0.729907214896122, "learning_rate": 8.855089537402822e-07, "loss": 0.4942, "step": 12769 }, { "epoch": 1.63, "grad_norm": 0.7209378188491543, "learning_rate": 8.849229055597691e-07, "loss": 0.5223, "step": 12770 }, { "epoch": 1.63, "grad_norm": 0.7363100372747866, "learning_rate": 8.843370325449391e-07, "loss": 0.481, "step": 12771 }, { "epoch": 1.63, "grad_norm": 0.5769602846047718, "learning_rate": 8.837513347207289e-07, "loss": 0.4676, "step": 12772 }, { "epoch": 1.63, "grad_norm": 1.3339742653907016, "learning_rate": 8.831658121120717e-07, "loss": 0.4967, "step": 12773 }, { "epoch": 1.63, "grad_norm": 0.6953893854071048, "learning_rate": 8.825804647438896e-07, "loss": 0.4616, "step": 12774 }, { "epoch": 1.63, "grad_norm": 0.699801531319734, "learning_rate": 8.819952926411001e-07, "loss": 0.4462, "step": 12775 }, { "epoch": 1.63, "grad_norm": 0.8871806640840008, "learning_rate": 8.814102958286114e-07, "loss": 0.5727, "step": 12776 }, { "epoch": 1.63, "grad_norm": 1.0679592843427608, "learning_rate": 8.80825474331326e-07, "loss": 0.579, "step": 12777 }, { "epoch": 1.63, "grad_norm": 0.7386159956116747, "learning_rate": 8.802408281741359e-07, "loss": 0.4653, "step": 12778 }, { "epoch": 1.63, "grad_norm": 0.6102114519117608, "learning_rate": 8.796563573819295e-07, "loss": 0.5151, "step": 12779 }, { "epoch": 1.63, "grad_norm": 0.7405987430936621, "learning_rate": 8.790720619795856e-07, "loss": 0.5048, "step": 12780 }, { "epoch": 1.63, "grad_norm": 0.6733450211364328, "learning_rate": 8.784879419919745e-07, "loss": 0.4799, "step": 12781 }, { "epoch": 1.63, "grad_norm": 0.6259096074078409, "learning_rate": 8.779039974439629e-07, "loss": 0.5323, "step": 12782 }, { "epoch": 1.63, "grad_norm": 0.7482671630912542, "learning_rate": 8.773202283604038e-07, "loss": 0.5542, "step": 12783 }, { "epoch": 1.63, "grad_norm": 0.7543470314722142, "learning_rate": 8.767366347661504e-07, "loss": 0.5424, "step": 12784 }, { "epoch": 1.63, "grad_norm": 0.7567867611786017, "learning_rate": 8.761532166860403e-07, "loss": 0.5481, "step": 12785 }, { "epoch": 1.63, "grad_norm": 1.2980893434933984, "learning_rate": 8.755699741449119e-07, "loss": 0.5114, "step": 12786 }, { "epoch": 1.63, "grad_norm": 0.6433162342033926, "learning_rate": 8.749869071675881e-07, "loss": 0.4419, "step": 12787 }, { "epoch": 1.63, "grad_norm": 0.5498069487354886, "learning_rate": 8.744040157788914e-07, "loss": 0.4397, "step": 12788 }, { "epoch": 1.63, "grad_norm": 0.619208547339296, "learning_rate": 8.738213000036311e-07, "loss": 0.4984, "step": 12789 }, { "epoch": 1.63, "grad_norm": 0.7877010838817367, "learning_rate": 8.732387598666142e-07, "loss": 0.5196, "step": 12790 }, { "epoch": 1.63, "grad_norm": 0.5701985222040307, "learning_rate": 8.726563953926332e-07, "loss": 0.4574, "step": 12791 }, { "epoch": 1.63, "grad_norm": 0.6291738925607676, "learning_rate": 8.720742066064825e-07, "loss": 0.4436, "step": 12792 }, { "epoch": 1.63, "grad_norm": 0.6870106580891933, "learning_rate": 8.714921935329413e-07, "loss": 0.5461, "step": 12793 }, { "epoch": 1.63, "grad_norm": 0.7127867889993103, "learning_rate": 8.709103561967852e-07, "loss": 0.5548, "step": 12794 }, { "epoch": 1.63, "grad_norm": 0.6823130352929512, "learning_rate": 8.703286946227801e-07, "loss": 0.4768, "step": 12795 }, { "epoch": 1.63, "grad_norm": 0.6615411855789585, "learning_rate": 8.697472088356868e-07, "loss": 0.4756, "step": 12796 }, { "epoch": 1.63, "grad_norm": 0.8126633273557131, "learning_rate": 8.691658988602548e-07, "loss": 0.5376, "step": 12797 }, { "epoch": 1.63, "grad_norm": 0.7531866453860803, "learning_rate": 8.68584764721232e-07, "loss": 0.535, "step": 12798 }, { "epoch": 1.63, "grad_norm": 0.710087295692156, "learning_rate": 8.680038064433521e-07, "loss": 0.4631, "step": 12799 }, { "epoch": 1.63, "grad_norm": 0.6952683810176823, "learning_rate": 8.674230240513471e-07, "loss": 0.495, "step": 12800 }, { "epoch": 1.63, "grad_norm": 0.7622844956920236, "learning_rate": 8.668424175699375e-07, "loss": 0.5292, "step": 12801 }, { "epoch": 1.63, "grad_norm": 0.6426268227313442, "learning_rate": 8.662619870238393e-07, "loss": 0.434, "step": 12802 }, { "epoch": 1.63, "grad_norm": 0.6461793773613524, "learning_rate": 8.65681732437757e-07, "loss": 0.4355, "step": 12803 }, { "epoch": 1.63, "grad_norm": 0.5955404102721203, "learning_rate": 8.651016538363937e-07, "loss": 0.4497, "step": 12804 }, { "epoch": 1.63, "grad_norm": 0.5442415018733776, "learning_rate": 8.645217512444387e-07, "loss": 0.4218, "step": 12805 }, { "epoch": 1.63, "grad_norm": 0.6549210783914754, "learning_rate": 8.639420246865793e-07, "loss": 0.4438, "step": 12806 }, { "epoch": 1.63, "grad_norm": 0.5928751738492465, "learning_rate": 8.63362474187489e-07, "loss": 0.4888, "step": 12807 }, { "epoch": 1.63, "grad_norm": 0.7446424627107152, "learning_rate": 8.627830997718411e-07, "loss": 0.5073, "step": 12808 }, { "epoch": 1.63, "grad_norm": 0.6591825988455244, "learning_rate": 8.622039014642947e-07, "loss": 0.4839, "step": 12809 }, { "epoch": 1.63, "grad_norm": 0.7743227084613311, "learning_rate": 8.616248792895072e-07, "loss": 0.5504, "step": 12810 }, { "epoch": 1.63, "grad_norm": 0.9221197984827924, "learning_rate": 8.610460332721227e-07, "loss": 0.5822, "step": 12811 }, { "epoch": 1.63, "grad_norm": 0.9379069178797567, "learning_rate": 8.604673634367833e-07, "loss": 0.4921, "step": 12812 }, { "epoch": 1.63, "grad_norm": 0.7134652191135612, "learning_rate": 8.598888698081193e-07, "loss": 0.5137, "step": 12813 }, { "epoch": 1.63, "grad_norm": 0.7943533116678968, "learning_rate": 8.593105524107575e-07, "loss": 0.5522, "step": 12814 }, { "epoch": 1.63, "grad_norm": 0.6742189118693355, "learning_rate": 8.587324112693124e-07, "loss": 0.4914, "step": 12815 }, { "epoch": 1.63, "grad_norm": 0.5679993695690433, "learning_rate": 8.581544464083946e-07, "loss": 0.469, "step": 12816 }, { "epoch": 1.63, "grad_norm": 0.693228624118625, "learning_rate": 8.575766578526068e-07, "loss": 0.5208, "step": 12817 }, { "epoch": 1.63, "grad_norm": 0.6523118977057253, "learning_rate": 8.569990456265448e-07, "loss": 0.4254, "step": 12818 }, { "epoch": 1.63, "grad_norm": 0.6774668729716028, "learning_rate": 8.564216097547934e-07, "loss": 0.488, "step": 12819 }, { "epoch": 1.63, "grad_norm": 0.642061469978005, "learning_rate": 8.558443502619335e-07, "loss": 0.4797, "step": 12820 }, { "epoch": 1.63, "grad_norm": 0.5761993852316213, "learning_rate": 8.552672671725359e-07, "loss": 0.4352, "step": 12821 }, { "epoch": 1.63, "grad_norm": 0.6014475715570641, "learning_rate": 8.546903605111678e-07, "loss": 0.4544, "step": 12822 }, { "epoch": 1.63, "grad_norm": 0.69047254476014, "learning_rate": 8.541136303023828e-07, "loss": 0.4654, "step": 12823 }, { "epoch": 1.63, "grad_norm": 0.7280801167596546, "learning_rate": 8.535370765707334e-07, "loss": 0.4932, "step": 12824 }, { "epoch": 1.63, "grad_norm": 0.8182998422516966, "learning_rate": 8.529606993407591e-07, "loss": 0.4931, "step": 12825 }, { "epoch": 1.63, "grad_norm": 0.6972969678185281, "learning_rate": 8.523844986369972e-07, "loss": 0.3868, "step": 12826 }, { "epoch": 1.63, "grad_norm": 0.6850838938784622, "learning_rate": 8.518084744839722e-07, "loss": 0.4554, "step": 12827 }, { "epoch": 1.63, "grad_norm": 0.7173971042788944, "learning_rate": 8.512326269062048e-07, "loss": 0.4915, "step": 12828 }, { "epoch": 1.63, "grad_norm": 0.6866272612304782, "learning_rate": 8.506569559282069e-07, "loss": 0.4909, "step": 12829 }, { "epoch": 1.63, "grad_norm": 0.9355442755206614, "learning_rate": 8.50081461574484e-07, "loss": 0.5336, "step": 12830 }, { "epoch": 1.63, "grad_norm": 0.745157441636124, "learning_rate": 8.495061438695307e-07, "loss": 0.5362, "step": 12831 }, { "epoch": 1.63, "grad_norm": 0.7043129229747243, "learning_rate": 8.489310028378389e-07, "loss": 0.4815, "step": 12832 }, { "epoch": 1.63, "grad_norm": 0.5913881407473699, "learning_rate": 8.483560385038886e-07, "loss": 0.4419, "step": 12833 }, { "epoch": 1.64, "grad_norm": 0.5700974850112825, "learning_rate": 8.477812508921562e-07, "loss": 0.4395, "step": 12834 }, { "epoch": 1.64, "grad_norm": 0.7844419247040663, "learning_rate": 8.472066400271057e-07, "loss": 0.4708, "step": 12835 }, { "epoch": 1.64, "grad_norm": 0.6572035815933482, "learning_rate": 8.466322059331999e-07, "loss": 0.4632, "step": 12836 }, { "epoch": 1.64, "grad_norm": 0.7701778925628898, "learning_rate": 8.460579486348868e-07, "loss": 0.5083, "step": 12837 }, { "epoch": 1.64, "grad_norm": 0.6973827586784469, "learning_rate": 8.454838681566146e-07, "loss": 0.5211, "step": 12838 }, { "epoch": 1.64, "grad_norm": 0.5768861626243713, "learning_rate": 8.44909964522817e-07, "loss": 0.4259, "step": 12839 }, { "epoch": 1.64, "grad_norm": 0.7607118620868782, "learning_rate": 8.443362377579245e-07, "loss": 0.4899, "step": 12840 }, { "epoch": 1.64, "grad_norm": 0.6265361220161415, "learning_rate": 8.437626878863598e-07, "loss": 0.4122, "step": 12841 }, { "epoch": 1.64, "grad_norm": 0.7477777433963804, "learning_rate": 8.431893149325343e-07, "loss": 0.4899, "step": 12842 }, { "epoch": 1.64, "grad_norm": 0.6869519540111463, "learning_rate": 8.426161189208581e-07, "loss": 0.4428, "step": 12843 }, { "epoch": 1.64, "grad_norm": 0.7677117293715067, "learning_rate": 8.420430998757279e-07, "loss": 0.4701, "step": 12844 }, { "epoch": 1.64, "grad_norm": 0.7075050411049599, "learning_rate": 8.414702578215361e-07, "loss": 0.5389, "step": 12845 }, { "epoch": 1.64, "grad_norm": 0.7448650908698292, "learning_rate": 8.408975927826663e-07, "loss": 0.5139, "step": 12846 }, { "epoch": 1.64, "grad_norm": 0.7008683212373801, "learning_rate": 8.403251047834965e-07, "loss": 0.4864, "step": 12847 }, { "epoch": 1.64, "grad_norm": 0.7111780531939054, "learning_rate": 8.397527938483929e-07, "loss": 0.4968, "step": 12848 }, { "epoch": 1.64, "grad_norm": 0.797529350349301, "learning_rate": 8.391806600017205e-07, "loss": 0.4926, "step": 12849 }, { "epoch": 1.64, "grad_norm": 0.6266665967107208, "learning_rate": 8.386087032678297e-07, "loss": 0.495, "step": 12850 }, { "epoch": 1.64, "grad_norm": 1.0193495897698541, "learning_rate": 8.380369236710694e-07, "loss": 0.5066, "step": 12851 }, { "epoch": 1.64, "grad_norm": 0.5895970389097267, "learning_rate": 8.374653212357769e-07, "loss": 0.4509, "step": 12852 }, { "epoch": 1.64, "grad_norm": 0.5817780685763771, "learning_rate": 8.368938959862838e-07, "loss": 0.4521, "step": 12853 }, { "epoch": 1.64, "grad_norm": 0.7759389007825285, "learning_rate": 8.363226479469145e-07, "loss": 0.478, "step": 12854 }, { "epoch": 1.64, "grad_norm": 0.677640181055498, "learning_rate": 8.35751577141986e-07, "loss": 0.4666, "step": 12855 }, { "epoch": 1.64, "grad_norm": 0.7725153472215607, "learning_rate": 8.351806835958043e-07, "loss": 0.5095, "step": 12856 }, { "epoch": 1.64, "grad_norm": 0.626086672665873, "learning_rate": 8.346099673326735e-07, "loss": 0.482, "step": 12857 }, { "epoch": 1.64, "grad_norm": 0.6631699369518941, "learning_rate": 8.340394283768843e-07, "loss": 0.5305, "step": 12858 }, { "epoch": 1.64, "grad_norm": 0.6777669057123549, "learning_rate": 8.334690667527256e-07, "loss": 0.4503, "step": 12859 }, { "epoch": 1.64, "grad_norm": 0.741269701076752, "learning_rate": 8.328988824844731e-07, "loss": 0.4879, "step": 12860 }, { "epoch": 1.64, "grad_norm": 0.6706932524006863, "learning_rate": 8.323288755963999e-07, "loss": 0.4978, "step": 12861 }, { "epoch": 1.64, "grad_norm": 0.6336435898560219, "learning_rate": 8.317590461127678e-07, "loss": 0.4873, "step": 12862 }, { "epoch": 1.64, "grad_norm": 0.7429636917255024, "learning_rate": 8.311893940578342e-07, "loss": 0.5615, "step": 12863 }, { "epoch": 1.64, "grad_norm": 0.7221451375413287, "learning_rate": 8.306199194558451e-07, "loss": 0.4942, "step": 12864 }, { "epoch": 1.64, "grad_norm": 0.7324635931564546, "learning_rate": 8.300506223310428e-07, "loss": 0.5, "step": 12865 }, { "epoch": 1.64, "grad_norm": 0.8000750903983272, "learning_rate": 8.294815027076602e-07, "loss": 0.4757, "step": 12866 }, { "epoch": 1.64, "grad_norm": 0.6413808570680682, "learning_rate": 8.289125606099247e-07, "loss": 0.4692, "step": 12867 }, { "epoch": 1.64, "grad_norm": 0.7632768009524749, "learning_rate": 8.283437960620511e-07, "loss": 0.4936, "step": 12868 }, { "epoch": 1.64, "grad_norm": 0.7547520806085447, "learning_rate": 8.277752090882524e-07, "loss": 0.5063, "step": 12869 }, { "epoch": 1.64, "grad_norm": 0.8296325601629132, "learning_rate": 8.272067997127292e-07, "loss": 0.4771, "step": 12870 }, { "epoch": 1.64, "grad_norm": 0.7512712457675783, "learning_rate": 8.266385679596794e-07, "loss": 0.4572, "step": 12871 }, { "epoch": 1.64, "grad_norm": 0.8396910615985428, "learning_rate": 8.260705138532887e-07, "loss": 0.4841, "step": 12872 }, { "epoch": 1.64, "grad_norm": 0.8665885944414522, "learning_rate": 8.255026374177394e-07, "loss": 0.511, "step": 12873 }, { "epoch": 1.64, "grad_norm": 0.693758702312013, "learning_rate": 8.249349386772021e-07, "loss": 0.4539, "step": 12874 }, { "epoch": 1.64, "grad_norm": 0.5410122062806495, "learning_rate": 8.243674176558436e-07, "loss": 0.4465, "step": 12875 }, { "epoch": 1.64, "grad_norm": 0.6517541693282767, "learning_rate": 8.238000743778196e-07, "loss": 0.4926, "step": 12876 }, { "epoch": 1.64, "grad_norm": 0.7451910240311678, "learning_rate": 8.232329088672825e-07, "loss": 0.5305, "step": 12877 }, { "epoch": 1.64, "grad_norm": 0.6763759854661514, "learning_rate": 8.226659211483712e-07, "loss": 0.4511, "step": 12878 }, { "epoch": 1.64, "grad_norm": 0.7636087536503291, "learning_rate": 8.220991112452248e-07, "loss": 0.5503, "step": 12879 }, { "epoch": 1.64, "grad_norm": 0.8573305759706739, "learning_rate": 8.215324791819673e-07, "loss": 0.5363, "step": 12880 }, { "epoch": 1.64, "grad_norm": 0.559265205239676, "learning_rate": 8.209660249827218e-07, "loss": 0.4213, "step": 12881 }, { "epoch": 1.64, "grad_norm": 0.6091760963288426, "learning_rate": 8.203997486715959e-07, "loss": 0.4353, "step": 12882 }, { "epoch": 1.64, "grad_norm": 0.576915491444806, "learning_rate": 8.198336502726989e-07, "loss": 0.4745, "step": 12883 }, { "epoch": 1.64, "grad_norm": 0.6035516984441854, "learning_rate": 8.192677298101237e-07, "loss": 0.3979, "step": 12884 }, { "epoch": 1.64, "grad_norm": 0.5871552116189338, "learning_rate": 8.187019873079627e-07, "loss": 0.3986, "step": 12885 }, { "epoch": 1.64, "grad_norm": 0.5784852782138676, "learning_rate": 8.181364227902954e-07, "loss": 0.482, "step": 12886 }, { "epoch": 1.64, "grad_norm": 0.6259653658355467, "learning_rate": 8.175710362811989e-07, "loss": 0.4166, "step": 12887 }, { "epoch": 1.64, "grad_norm": 0.6200997179134496, "learning_rate": 8.170058278047366e-07, "loss": 0.471, "step": 12888 }, { "epoch": 1.64, "grad_norm": 0.6939817856816447, "learning_rate": 8.164407973849709e-07, "loss": 0.5251, "step": 12889 }, { "epoch": 1.64, "grad_norm": 0.6735406968827263, "learning_rate": 8.158759450459497e-07, "loss": 0.4794, "step": 12890 }, { "epoch": 1.64, "grad_norm": 0.7885523250429712, "learning_rate": 8.153112708117195e-07, "loss": 0.5068, "step": 12891 }, { "epoch": 1.64, "grad_norm": 0.5748156348178554, "learning_rate": 8.147467747063165e-07, "loss": 0.4089, "step": 12892 }, { "epoch": 1.64, "grad_norm": 0.6401032534583887, "learning_rate": 8.141824567537698e-07, "loss": 0.5027, "step": 12893 }, { "epoch": 1.64, "grad_norm": 0.6638303765445766, "learning_rate": 8.136183169780992e-07, "loss": 0.4302, "step": 12894 }, { "epoch": 1.64, "grad_norm": 0.8371077580433729, "learning_rate": 8.130543554033199e-07, "loss": 0.5774, "step": 12895 }, { "epoch": 1.64, "grad_norm": 0.7851743978011276, "learning_rate": 8.12490572053436e-07, "loss": 0.5602, "step": 12896 }, { "epoch": 1.64, "grad_norm": 0.907138751221361, "learning_rate": 8.119269669524482e-07, "loss": 0.501, "step": 12897 }, { "epoch": 1.64, "grad_norm": 0.6346975941397917, "learning_rate": 8.113635401243452e-07, "loss": 0.4505, "step": 12898 }, { "epoch": 1.64, "grad_norm": 0.5904696565381854, "learning_rate": 8.108002915931124e-07, "loss": 0.4932, "step": 12899 }, { "epoch": 1.64, "grad_norm": 0.7228923970334348, "learning_rate": 8.102372213827237e-07, "loss": 0.4929, "step": 12900 }, { "epoch": 1.64, "grad_norm": 0.7394545033473878, "learning_rate": 8.096743295171489e-07, "loss": 0.4394, "step": 12901 }, { "epoch": 1.64, "grad_norm": 0.6395912584032888, "learning_rate": 8.091116160203461e-07, "loss": 0.4829, "step": 12902 }, { "epoch": 1.64, "grad_norm": 0.8757979099769474, "learning_rate": 8.085490809162693e-07, "loss": 0.5015, "step": 12903 }, { "epoch": 1.64, "grad_norm": 0.6401509667574455, "learning_rate": 8.079867242288658e-07, "loss": 0.5056, "step": 12904 }, { "epoch": 1.64, "grad_norm": 0.6710721589399226, "learning_rate": 8.074245459820706e-07, "loss": 0.5005, "step": 12905 }, { "epoch": 1.64, "grad_norm": 0.6041915877655456, "learning_rate": 8.068625461998159e-07, "loss": 0.406, "step": 12906 }, { "epoch": 1.64, "grad_norm": 0.6124754154786862, "learning_rate": 8.063007249060223e-07, "loss": 0.4616, "step": 12907 }, { "epoch": 1.64, "grad_norm": 0.7405603273689039, "learning_rate": 8.05739082124607e-07, "loss": 0.4855, "step": 12908 }, { "epoch": 1.64, "grad_norm": 0.6723298812843324, "learning_rate": 8.051776178794746e-07, "loss": 0.4827, "step": 12909 }, { "epoch": 1.64, "grad_norm": 0.891111804038138, "learning_rate": 8.046163321945272e-07, "loss": 0.4905, "step": 12910 }, { "epoch": 1.64, "grad_norm": 0.6233705127502565, "learning_rate": 8.040552250936551e-07, "loss": 0.4662, "step": 12911 }, { "epoch": 1.64, "grad_norm": 0.6034813245452437, "learning_rate": 8.034942966007447e-07, "loss": 0.5266, "step": 12912 }, { "epoch": 1.65, "grad_norm": 0.763363457423129, "learning_rate": 8.029335467396715e-07, "loss": 0.5433, "step": 12913 }, { "epoch": 1.65, "grad_norm": 0.7228422510054776, "learning_rate": 8.023729755343046e-07, "loss": 0.5173, "step": 12914 }, { "epoch": 1.65, "grad_norm": 0.7265337413618699, "learning_rate": 8.018125830085067e-07, "loss": 0.4799, "step": 12915 }, { "epoch": 1.65, "grad_norm": 0.616463475400724, "learning_rate": 8.012523691861329e-07, "loss": 0.423, "step": 12916 }, { "epoch": 1.65, "grad_norm": 0.6769938747312553, "learning_rate": 8.006923340910272e-07, "loss": 0.5164, "step": 12917 }, { "epoch": 1.65, "grad_norm": 0.688362097141529, "learning_rate": 8.001324777470309e-07, "loss": 0.4999, "step": 12918 }, { "epoch": 1.65, "grad_norm": 0.7989264480778056, "learning_rate": 7.995728001779729e-07, "loss": 0.4795, "step": 12919 }, { "epoch": 1.65, "grad_norm": 1.3192145243020188, "learning_rate": 7.990133014076796e-07, "loss": 0.5026, "step": 12920 }, { "epoch": 1.65, "grad_norm": 0.649829606315557, "learning_rate": 7.98453981459964e-07, "loss": 0.4411, "step": 12921 }, { "epoch": 1.65, "grad_norm": 0.6444632438404193, "learning_rate": 7.978948403586378e-07, "loss": 0.4706, "step": 12922 }, { "epoch": 1.65, "grad_norm": 0.8467081272132845, "learning_rate": 7.973358781274987e-07, "loss": 0.5475, "step": 12923 }, { "epoch": 1.65, "grad_norm": 0.668668706742791, "learning_rate": 7.96777094790343e-07, "loss": 0.4091, "step": 12924 }, { "epoch": 1.65, "grad_norm": 0.5903267115077628, "learning_rate": 7.962184903709536e-07, "loss": 0.4275, "step": 12925 }, { "epoch": 1.65, "grad_norm": 0.6769226830781764, "learning_rate": 7.956600648931106e-07, "loss": 0.4815, "step": 12926 }, { "epoch": 1.65, "grad_norm": 0.6927791876330448, "learning_rate": 7.951018183805814e-07, "loss": 0.4473, "step": 12927 }, { "epoch": 1.65, "grad_norm": 0.602946518439064, "learning_rate": 7.945437508571329e-07, "loss": 0.4667, "step": 12928 }, { "epoch": 1.65, "grad_norm": 0.7335284665509962, "learning_rate": 7.939858623465174e-07, "loss": 0.4928, "step": 12929 }, { "epoch": 1.65, "grad_norm": 0.9970204396344293, "learning_rate": 7.934281528724841e-07, "loss": 0.462, "step": 12930 }, { "epoch": 1.65, "grad_norm": 0.5867913780950228, "learning_rate": 7.928706224587713e-07, "loss": 0.4069, "step": 12931 }, { "epoch": 1.65, "grad_norm": 0.6327843281415936, "learning_rate": 7.923132711291131e-07, "loss": 0.4184, "step": 12932 }, { "epoch": 1.65, "grad_norm": 0.6251252121386904, "learning_rate": 7.917560989072314e-07, "loss": 0.4516, "step": 12933 }, { "epoch": 1.65, "grad_norm": 0.5812190764951137, "learning_rate": 7.911991058168467e-07, "loss": 0.4596, "step": 12934 }, { "epoch": 1.65, "grad_norm": 0.619311031830165, "learning_rate": 7.906422918816653e-07, "loss": 0.5167, "step": 12935 }, { "epoch": 1.65, "grad_norm": 0.5953256612793433, "learning_rate": 7.900856571253917e-07, "loss": 0.4495, "step": 12936 }, { "epoch": 1.65, "grad_norm": 0.6694775640246189, "learning_rate": 7.895292015717171e-07, "loss": 0.4557, "step": 12937 }, { "epoch": 1.65, "grad_norm": 0.5757290052115833, "learning_rate": 7.889729252443312e-07, "loss": 0.4306, "step": 12938 }, { "epoch": 1.65, "grad_norm": 0.7185916281465337, "learning_rate": 7.884168281669103e-07, "loss": 0.5432, "step": 12939 }, { "epoch": 1.65, "grad_norm": 0.6574007004635614, "learning_rate": 7.878609103631263e-07, "loss": 0.5377, "step": 12940 }, { "epoch": 1.65, "grad_norm": 0.7279954507045194, "learning_rate": 7.873051718566433e-07, "loss": 0.4663, "step": 12941 }, { "epoch": 1.65, "grad_norm": 0.5583688511166641, "learning_rate": 7.867496126711188e-07, "loss": 0.4496, "step": 12942 }, { "epoch": 1.65, "grad_norm": 0.6792929551767777, "learning_rate": 7.86194232830198e-07, "loss": 0.5017, "step": 12943 }, { "epoch": 1.65, "grad_norm": 0.8457676215580413, "learning_rate": 7.856390323575253e-07, "loss": 0.5428, "step": 12944 }, { "epoch": 1.65, "grad_norm": 0.7996971489513465, "learning_rate": 7.850840112767299e-07, "loss": 0.4268, "step": 12945 }, { "epoch": 1.65, "grad_norm": 0.7310596144110078, "learning_rate": 7.845291696114405e-07, "loss": 0.5013, "step": 12946 }, { "epoch": 1.65, "grad_norm": 0.8241422068983943, "learning_rate": 7.839745073852728e-07, "loss": 0.5008, "step": 12947 }, { "epoch": 1.65, "grad_norm": 0.6176619276947708, "learning_rate": 7.834200246218387e-07, "loss": 0.5052, "step": 12948 }, { "epoch": 1.65, "grad_norm": 0.7580199344831685, "learning_rate": 7.828657213447388e-07, "loss": 0.5222, "step": 12949 }, { "epoch": 1.65, "grad_norm": 0.6049700433126319, "learning_rate": 7.823115975775702e-07, "loss": 0.4993, "step": 12950 }, { "epoch": 1.65, "grad_norm": 0.5503220972317298, "learning_rate": 7.81757653343918e-07, "loss": 0.4317, "step": 12951 }, { "epoch": 1.65, "grad_norm": 0.6285219109868797, "learning_rate": 7.812038886673628e-07, "loss": 0.413, "step": 12952 }, { "epoch": 1.65, "grad_norm": 0.6381661667250047, "learning_rate": 7.806503035714769e-07, "loss": 0.4883, "step": 12953 }, { "epoch": 1.65, "grad_norm": 0.5932464420473109, "learning_rate": 7.800968980798257e-07, "loss": 0.4756, "step": 12954 }, { "epoch": 1.65, "grad_norm": 0.7125045474061463, "learning_rate": 7.795436722159633e-07, "loss": 0.5415, "step": 12955 }, { "epoch": 1.65, "grad_norm": 0.8017041544020133, "learning_rate": 7.789906260034413e-07, "loss": 0.5526, "step": 12956 }, { "epoch": 1.65, "grad_norm": 0.7769187239567322, "learning_rate": 7.784377594657988e-07, "loss": 0.5996, "step": 12957 }, { "epoch": 1.65, "grad_norm": 0.7032674690617128, "learning_rate": 7.778850726265718e-07, "loss": 0.4297, "step": 12958 }, { "epoch": 1.65, "grad_norm": 0.6635092295049717, "learning_rate": 7.773325655092845e-07, "loss": 0.5123, "step": 12959 }, { "epoch": 1.65, "grad_norm": 0.7287903732399733, "learning_rate": 7.767802381374573e-07, "loss": 0.5001, "step": 12960 }, { "epoch": 1.65, "grad_norm": 0.8815515419851914, "learning_rate": 7.762280905345981e-07, "loss": 0.4753, "step": 12961 }, { "epoch": 1.65, "grad_norm": 0.852117077884072, "learning_rate": 7.756761227242137e-07, "loss": 0.5487, "step": 12962 }, { "epoch": 1.65, "grad_norm": 0.740119631123111, "learning_rate": 7.751243347297959e-07, "loss": 0.5316, "step": 12963 }, { "epoch": 1.65, "grad_norm": 0.6652364159540397, "learning_rate": 7.745727265748348e-07, "loss": 0.4258, "step": 12964 }, { "epoch": 1.65, "grad_norm": 0.6741275602398379, "learning_rate": 7.740212982828116e-07, "loss": 0.4428, "step": 12965 }, { "epoch": 1.65, "grad_norm": 0.6563515682871657, "learning_rate": 7.734700498771963e-07, "loss": 0.4921, "step": 12966 }, { "epoch": 1.65, "grad_norm": 0.7906609466563777, "learning_rate": 7.729189813814559e-07, "loss": 0.4873, "step": 12967 }, { "epoch": 1.65, "grad_norm": 0.6544714745971126, "learning_rate": 7.723680928190453e-07, "loss": 0.4208, "step": 12968 }, { "epoch": 1.65, "grad_norm": 0.6061011837668667, "learning_rate": 7.718173842134163e-07, "loss": 0.4228, "step": 12969 }, { "epoch": 1.65, "grad_norm": 0.6166496238355582, "learning_rate": 7.712668555880093e-07, "loss": 0.4799, "step": 12970 }, { "epoch": 1.65, "grad_norm": 0.7788565357494753, "learning_rate": 7.707165069662598e-07, "loss": 0.5457, "step": 12971 }, { "epoch": 1.65, "grad_norm": 0.8601077719618896, "learning_rate": 7.70166338371593e-07, "loss": 0.5506, "step": 12972 }, { "epoch": 1.65, "grad_norm": 0.7296875474459261, "learning_rate": 7.696163498274295e-07, "loss": 0.4908, "step": 12973 }, { "epoch": 1.65, "grad_norm": 0.6565017686632247, "learning_rate": 7.69066541357179e-07, "loss": 0.492, "step": 12974 }, { "epoch": 1.65, "grad_norm": 0.7101910291654338, "learning_rate": 7.685169129842462e-07, "loss": 0.4957, "step": 12975 }, { "epoch": 1.65, "grad_norm": 0.6221243896230317, "learning_rate": 7.679674647320257e-07, "loss": 0.4326, "step": 12976 }, { "epoch": 1.65, "grad_norm": 0.6245040824059346, "learning_rate": 7.674181966239064e-07, "loss": 0.4989, "step": 12977 }, { "epoch": 1.65, "grad_norm": 0.8099110302658875, "learning_rate": 7.668691086832692e-07, "loss": 0.5172, "step": 12978 }, { "epoch": 1.65, "grad_norm": 0.6895925898459777, "learning_rate": 7.66320200933488e-07, "loss": 0.481, "step": 12979 }, { "epoch": 1.65, "grad_norm": 0.5730941122861929, "learning_rate": 7.65771473397926e-07, "loss": 0.4303, "step": 12980 }, { "epoch": 1.65, "grad_norm": 0.8441742349722415, "learning_rate": 7.652229260999427e-07, "loss": 0.564, "step": 12981 }, { "epoch": 1.65, "grad_norm": 0.7304518466611672, "learning_rate": 7.646745590628856e-07, "loss": 0.4971, "step": 12982 }, { "epoch": 1.65, "grad_norm": 0.6459562460742742, "learning_rate": 7.641263723101e-07, "loss": 0.453, "step": 12983 }, { "epoch": 1.65, "grad_norm": 0.7089570924876915, "learning_rate": 7.635783658649176e-07, "loss": 0.4577, "step": 12984 }, { "epoch": 1.65, "grad_norm": 0.7683751673179793, "learning_rate": 7.630305397506671e-07, "loss": 0.4874, "step": 12985 }, { "epoch": 1.65, "grad_norm": 0.6027885918993885, "learning_rate": 7.624828939906664e-07, "loss": 0.4564, "step": 12986 }, { "epoch": 1.65, "grad_norm": 0.7191731710142943, "learning_rate": 7.619354286082287e-07, "loss": 0.4918, "step": 12987 }, { "epoch": 1.65, "grad_norm": 0.6210936531340976, "learning_rate": 7.613881436266563e-07, "loss": 0.4451, "step": 12988 }, { "epoch": 1.65, "grad_norm": 0.6691189661530534, "learning_rate": 7.60841039069245e-07, "loss": 0.4509, "step": 12989 }, { "epoch": 1.65, "grad_norm": 0.5968869869341031, "learning_rate": 7.602941149592852e-07, "loss": 0.4289, "step": 12990 }, { "epoch": 1.66, "grad_norm": 0.5550802616164513, "learning_rate": 7.59747371320057e-07, "loss": 0.4399, "step": 12991 }, { "epoch": 1.66, "grad_norm": 0.6319581237646367, "learning_rate": 7.592008081748331e-07, "loss": 0.481, "step": 12992 }, { "epoch": 1.66, "grad_norm": 0.7370054690493896, "learning_rate": 7.586544255468797e-07, "loss": 0.5194, "step": 12993 }, { "epoch": 1.66, "grad_norm": 0.7288560160669014, "learning_rate": 7.581082234594528e-07, "loss": 0.5233, "step": 12994 }, { "epoch": 1.66, "grad_norm": 0.7694330191991845, "learning_rate": 7.575622019358053e-07, "loss": 0.4878, "step": 12995 }, { "epoch": 1.66, "grad_norm": 0.6626814634146486, "learning_rate": 7.570163609991765e-07, "loss": 0.492, "step": 12996 }, { "epoch": 1.66, "grad_norm": 0.665603531910884, "learning_rate": 7.564707006728033e-07, "loss": 0.4382, "step": 12997 }, { "epoch": 1.66, "grad_norm": 0.6275202889552421, "learning_rate": 7.559252209799117e-07, "loss": 0.4899, "step": 12998 }, { "epoch": 1.66, "grad_norm": 0.6114267621216525, "learning_rate": 7.553799219437219e-07, "loss": 0.4667, "step": 12999 }, { "epoch": 1.66, "grad_norm": 0.7903692029872443, "learning_rate": 7.548348035874442e-07, "loss": 0.5202, "step": 13000 }, { "epoch": 1.66, "grad_norm": 0.7346821114520977, "learning_rate": 7.54289865934284e-07, "loss": 0.5377, "step": 13001 }, { "epoch": 1.66, "grad_norm": 0.731185090498993, "learning_rate": 7.53745109007435e-07, "loss": 0.4859, "step": 13002 }, { "epoch": 1.66, "grad_norm": 0.6129004248114427, "learning_rate": 7.532005328300895e-07, "loss": 0.4419, "step": 13003 }, { "epoch": 1.66, "grad_norm": 0.7617171081209032, "learning_rate": 7.526561374254254e-07, "loss": 0.5702, "step": 13004 }, { "epoch": 1.66, "grad_norm": 0.7329983749525989, "learning_rate": 7.521119228166184e-07, "loss": 0.5231, "step": 13005 }, { "epoch": 1.66, "grad_norm": 0.7468514018779087, "learning_rate": 7.515678890268313e-07, "loss": 0.4472, "step": 13006 }, { "epoch": 1.66, "grad_norm": 0.8052195471554715, "learning_rate": 7.510240360792243e-07, "loss": 0.5443, "step": 13007 }, { "epoch": 1.66, "grad_norm": 1.065148545975146, "learning_rate": 7.504803639969449e-07, "loss": 0.5274, "step": 13008 }, { "epoch": 1.66, "grad_norm": 0.559498818441496, "learning_rate": 7.49936872803138e-07, "loss": 0.4246, "step": 13009 }, { "epoch": 1.66, "grad_norm": 0.8772362298681473, "learning_rate": 7.493935625209364e-07, "loss": 0.4888, "step": 13010 }, { "epoch": 1.66, "grad_norm": 0.5593305478998963, "learning_rate": 7.488504331734687e-07, "loss": 0.4192, "step": 13011 }, { "epoch": 1.66, "grad_norm": 0.6895720604527168, "learning_rate": 7.483074847838523e-07, "loss": 0.4659, "step": 13012 }, { "epoch": 1.66, "grad_norm": 0.8371784191971613, "learning_rate": 7.477647173752012e-07, "loss": 0.508, "step": 13013 }, { "epoch": 1.66, "grad_norm": 0.569786026235684, "learning_rate": 7.472221309706163e-07, "loss": 0.3731, "step": 13014 }, { "epoch": 1.66, "grad_norm": 0.6401857469879865, "learning_rate": 7.466797255931957e-07, "loss": 0.52, "step": 13015 }, { "epoch": 1.66, "grad_norm": 0.5783988317835178, "learning_rate": 7.461375012660271e-07, "loss": 0.4731, "step": 13016 }, { "epoch": 1.66, "grad_norm": 0.8616613359363801, "learning_rate": 7.455954580121932e-07, "loss": 0.5411, "step": 13017 }, { "epoch": 1.66, "grad_norm": 0.7458071852809285, "learning_rate": 7.450535958547639e-07, "loss": 0.5068, "step": 13018 }, { "epoch": 1.66, "grad_norm": 0.6668240667469754, "learning_rate": 7.445119148168079e-07, "loss": 0.4866, "step": 13019 }, { "epoch": 1.66, "grad_norm": 0.609264915145959, "learning_rate": 7.439704149213795e-07, "loss": 0.4657, "step": 13020 }, { "epoch": 1.66, "grad_norm": 0.6610017803283099, "learning_rate": 7.434290961915313e-07, "loss": 0.4286, "step": 13021 }, { "epoch": 1.66, "grad_norm": 0.6932479392608297, "learning_rate": 7.428879586503041e-07, "loss": 0.4664, "step": 13022 }, { "epoch": 1.66, "grad_norm": 0.6948995144344969, "learning_rate": 7.423470023207329e-07, "loss": 0.4536, "step": 13023 }, { "epoch": 1.66, "grad_norm": 0.6612280095015978, "learning_rate": 7.418062272258436e-07, "loss": 0.4342, "step": 13024 }, { "epoch": 1.66, "grad_norm": 0.6851707634442115, "learning_rate": 7.41265633388657e-07, "loss": 0.4765, "step": 13025 }, { "epoch": 1.66, "grad_norm": 0.7470565971506619, "learning_rate": 7.407252208321824e-07, "loss": 0.5595, "step": 13026 }, { "epoch": 1.66, "grad_norm": 0.6166934232491137, "learning_rate": 7.401849895794244e-07, "loss": 0.4695, "step": 13027 }, { "epoch": 1.66, "grad_norm": 0.6614334506855875, "learning_rate": 7.3964493965338e-07, "loss": 0.505, "step": 13028 }, { "epoch": 1.66, "grad_norm": 0.7556927206841374, "learning_rate": 7.391050710770353e-07, "loss": 0.4737, "step": 13029 }, { "epoch": 1.66, "grad_norm": 0.5913442058422926, "learning_rate": 7.38565383873373e-07, "loss": 0.4327, "step": 13030 }, { "epoch": 1.66, "grad_norm": 0.8562509298863573, "learning_rate": 7.380258780653638e-07, "loss": 0.5219, "step": 13031 }, { "epoch": 1.66, "grad_norm": 0.7771387694218233, "learning_rate": 7.374865536759745e-07, "loss": 0.4819, "step": 13032 }, { "epoch": 1.66, "grad_norm": 0.6183853055190347, "learning_rate": 7.369474107281605e-07, "loss": 0.5023, "step": 13033 }, { "epoch": 1.66, "grad_norm": 0.8297526349446757, "learning_rate": 7.364084492448736e-07, "loss": 0.5136, "step": 13034 }, { "epoch": 1.66, "grad_norm": 0.6822452527641643, "learning_rate": 7.35869669249053e-07, "loss": 0.4479, "step": 13035 }, { "epoch": 1.66, "grad_norm": 0.6817127891006668, "learning_rate": 7.353310707636363e-07, "loss": 0.4084, "step": 13036 }, { "epoch": 1.66, "grad_norm": 0.7125628455613203, "learning_rate": 7.347926538115463e-07, "loss": 0.4519, "step": 13037 }, { "epoch": 1.66, "grad_norm": 0.5770709058856145, "learning_rate": 7.342544184157036e-07, "loss": 0.4339, "step": 13038 }, { "epoch": 1.66, "grad_norm": 0.6341578174651968, "learning_rate": 7.337163645990187e-07, "loss": 0.4442, "step": 13039 }, { "epoch": 1.66, "grad_norm": 0.563182040465081, "learning_rate": 7.331784923843965e-07, "loss": 0.4063, "step": 13040 }, { "epoch": 1.66, "grad_norm": 0.6058504785463513, "learning_rate": 7.326408017947295e-07, "loss": 0.4473, "step": 13041 }, { "epoch": 1.66, "grad_norm": 0.646655861536073, "learning_rate": 7.321032928529087e-07, "loss": 0.4729, "step": 13042 }, { "epoch": 1.66, "grad_norm": 0.6635790572690793, "learning_rate": 7.315659655818108e-07, "loss": 0.444, "step": 13043 }, { "epoch": 1.66, "grad_norm": 0.7742992560269968, "learning_rate": 7.310288200043114e-07, "loss": 0.4826, "step": 13044 }, { "epoch": 1.66, "grad_norm": 0.6851968224279334, "learning_rate": 7.304918561432723e-07, "loss": 0.4736, "step": 13045 }, { "epoch": 1.66, "grad_norm": 0.6680179913422574, "learning_rate": 7.299550740215522e-07, "loss": 0.4407, "step": 13046 }, { "epoch": 1.66, "grad_norm": 0.7142244195542182, "learning_rate": 7.294184736619985e-07, "loss": 0.4687, "step": 13047 }, { "epoch": 1.66, "grad_norm": 0.5620700511748968, "learning_rate": 7.288820550874548e-07, "loss": 0.4412, "step": 13048 }, { "epoch": 1.66, "grad_norm": 0.9477037343053843, "learning_rate": 7.283458183207526e-07, "loss": 0.5674, "step": 13049 }, { "epoch": 1.66, "grad_norm": 0.9224315071245608, "learning_rate": 7.278097633847192e-07, "loss": 0.5528, "step": 13050 }, { "epoch": 1.66, "grad_norm": 0.7424255592243945, "learning_rate": 7.272738903021704e-07, "loss": 0.4754, "step": 13051 }, { "epoch": 1.66, "grad_norm": 0.6814233177031853, "learning_rate": 7.267381990959205e-07, "loss": 0.5611, "step": 13052 }, { "epoch": 1.66, "grad_norm": 0.6492487699835662, "learning_rate": 7.262026897887692e-07, "loss": 0.4487, "step": 13053 }, { "epoch": 1.66, "grad_norm": 0.5486812669352463, "learning_rate": 7.256673624035132e-07, "loss": 0.427, "step": 13054 }, { "epoch": 1.66, "grad_norm": 0.6314034864550704, "learning_rate": 7.251322169629377e-07, "loss": 0.4779, "step": 13055 }, { "epoch": 1.66, "grad_norm": 0.8123776983449972, "learning_rate": 7.245972534898238e-07, "loss": 0.5425, "step": 13056 }, { "epoch": 1.66, "grad_norm": 0.7270194842595458, "learning_rate": 7.240624720069423e-07, "loss": 0.554, "step": 13057 }, { "epoch": 1.66, "grad_norm": 0.555608275370568, "learning_rate": 7.235278725370581e-07, "loss": 0.4316, "step": 13058 }, { "epoch": 1.66, "grad_norm": 0.6028605756879111, "learning_rate": 7.229934551029255e-07, "loss": 0.4497, "step": 13059 }, { "epoch": 1.66, "grad_norm": 0.6407792965939596, "learning_rate": 7.224592197272956e-07, "loss": 0.4376, "step": 13060 }, { "epoch": 1.66, "grad_norm": 0.6399655644398655, "learning_rate": 7.219251664329063e-07, "loss": 0.4526, "step": 13061 }, { "epoch": 1.66, "grad_norm": 0.6129655474952622, "learning_rate": 7.213912952424934e-07, "loss": 0.4726, "step": 13062 }, { "epoch": 1.66, "grad_norm": 0.5465020328331643, "learning_rate": 7.208576061787786e-07, "loss": 0.4248, "step": 13063 }, { "epoch": 1.66, "grad_norm": 0.606850040016243, "learning_rate": 7.203240992644822e-07, "loss": 0.4704, "step": 13064 }, { "epoch": 1.66, "grad_norm": 0.5792943994319595, "learning_rate": 7.197907745223126e-07, "loss": 0.4436, "step": 13065 }, { "epoch": 1.66, "grad_norm": 0.7508050048661842, "learning_rate": 7.192576319749733e-07, "loss": 0.5107, "step": 13066 }, { "epoch": 1.66, "grad_norm": 0.8036669873473785, "learning_rate": 7.187246716451562e-07, "loss": 0.5118, "step": 13067 }, { "epoch": 1.66, "grad_norm": 0.6136649009238249, "learning_rate": 7.181918935555498e-07, "loss": 0.4452, "step": 13068 }, { "epoch": 1.66, "grad_norm": 0.7567749003962727, "learning_rate": 7.176592977288311e-07, "loss": 0.5207, "step": 13069 }, { "epoch": 1.67, "grad_norm": 0.7850429780724448, "learning_rate": 7.171268841876727e-07, "loss": 0.5569, "step": 13070 }, { "epoch": 1.67, "grad_norm": 0.6604306471827248, "learning_rate": 7.165946529547352e-07, "loss": 0.443, "step": 13071 }, { "epoch": 1.67, "grad_norm": 0.6015681024243826, "learning_rate": 7.160626040526769e-07, "loss": 0.4057, "step": 13072 }, { "epoch": 1.67, "grad_norm": 0.7113670092806605, "learning_rate": 7.155307375041432e-07, "loss": 0.4232, "step": 13073 }, { "epoch": 1.67, "grad_norm": 0.6972354350735681, "learning_rate": 7.149990533317758e-07, "loss": 0.4049, "step": 13074 }, { "epoch": 1.67, "grad_norm": 0.6877848724440283, "learning_rate": 7.144675515582045e-07, "loss": 0.4705, "step": 13075 }, { "epoch": 1.67, "grad_norm": 0.7305208560845925, "learning_rate": 7.13936232206055e-07, "loss": 0.4815, "step": 13076 }, { "epoch": 1.67, "grad_norm": 0.5987356107581915, "learning_rate": 7.134050952979438e-07, "loss": 0.4201, "step": 13077 }, { "epoch": 1.67, "grad_norm": 0.6258357902545607, "learning_rate": 7.128741408564804e-07, "loss": 0.4517, "step": 13078 }, { "epoch": 1.67, "grad_norm": 0.7569671739175342, "learning_rate": 7.123433689042647e-07, "loss": 0.5336, "step": 13079 }, { "epoch": 1.67, "grad_norm": 0.672071347817186, "learning_rate": 7.118127794638912e-07, "loss": 0.46, "step": 13080 }, { "epoch": 1.67, "grad_norm": 1.3821295580123805, "learning_rate": 7.112823725579432e-07, "loss": 0.512, "step": 13081 }, { "epoch": 1.67, "grad_norm": 0.6189666307377367, "learning_rate": 7.107521482090012e-07, "loss": 0.4758, "step": 13082 }, { "epoch": 1.67, "grad_norm": 0.6989153900805563, "learning_rate": 7.102221064396325e-07, "loss": 0.4215, "step": 13083 }, { "epoch": 1.67, "grad_norm": 0.5546721771517976, "learning_rate": 7.096922472724022e-07, "loss": 0.4439, "step": 13084 }, { "epoch": 1.67, "grad_norm": 0.6230653923101679, "learning_rate": 7.091625707298616e-07, "loss": 0.4702, "step": 13085 }, { "epoch": 1.67, "grad_norm": 0.7276643413376166, "learning_rate": 7.086330768345601e-07, "loss": 0.5207, "step": 13086 }, { "epoch": 1.67, "grad_norm": 0.8510642448108363, "learning_rate": 7.081037656090339e-07, "loss": 0.5136, "step": 13087 }, { "epoch": 1.67, "grad_norm": 0.7478444288255979, "learning_rate": 7.075746370758152e-07, "loss": 0.4879, "step": 13088 }, { "epoch": 1.67, "grad_norm": 0.773402832610657, "learning_rate": 7.070456912574292e-07, "loss": 0.4936, "step": 13089 }, { "epoch": 1.67, "grad_norm": 0.8679934593854997, "learning_rate": 7.065169281763884e-07, "loss": 0.4637, "step": 13090 }, { "epoch": 1.67, "grad_norm": 0.6176333741315375, "learning_rate": 7.059883478552032e-07, "loss": 0.4633, "step": 13091 }, { "epoch": 1.67, "grad_norm": 0.799863799275152, "learning_rate": 7.054599503163712e-07, "loss": 0.5134, "step": 13092 }, { "epoch": 1.67, "grad_norm": 0.6194353155585677, "learning_rate": 7.049317355823871e-07, "loss": 0.4629, "step": 13093 }, { "epoch": 1.67, "grad_norm": 0.700926452304429, "learning_rate": 7.044037036757323e-07, "loss": 0.5148, "step": 13094 }, { "epoch": 1.67, "grad_norm": 0.6380438169448959, "learning_rate": 7.038758546188868e-07, "loss": 0.4513, "step": 13095 }, { "epoch": 1.67, "grad_norm": 0.5827345597817398, "learning_rate": 7.033481884343162e-07, "loss": 0.4359, "step": 13096 }, { "epoch": 1.67, "grad_norm": 0.7226286784706062, "learning_rate": 7.028207051444841e-07, "loss": 0.5085, "step": 13097 }, { "epoch": 1.67, "grad_norm": 0.7475835534424258, "learning_rate": 7.022934047718416e-07, "loss": 0.5389, "step": 13098 }, { "epoch": 1.67, "grad_norm": 0.7241048837956097, "learning_rate": 7.01766287338837e-07, "loss": 0.4968, "step": 13099 }, { "epoch": 1.67, "grad_norm": 0.7214915759088537, "learning_rate": 7.012393528679046e-07, "loss": 0.4999, "step": 13100 }, { "epoch": 1.67, "grad_norm": 0.7016263938970206, "learning_rate": 7.00712601381477e-07, "loss": 0.5373, "step": 13101 }, { "epoch": 1.67, "grad_norm": 0.9306901183470705, "learning_rate": 7.001860329019744e-07, "loss": 0.5565, "step": 13102 }, { "epoch": 1.67, "grad_norm": 0.6693246947816341, "learning_rate": 6.996596474518136e-07, "loss": 0.5061, "step": 13103 }, { "epoch": 1.67, "grad_norm": 0.7172448593490653, "learning_rate": 6.991334450533987e-07, "loss": 0.4848, "step": 13104 }, { "epoch": 1.67, "grad_norm": 0.7642974829582603, "learning_rate": 6.986074257291303e-07, "loss": 0.5196, "step": 13105 }, { "epoch": 1.67, "grad_norm": 0.7308586566057802, "learning_rate": 6.980815895013976e-07, "loss": 0.5219, "step": 13106 }, { "epoch": 1.67, "grad_norm": 0.9302772261948895, "learning_rate": 6.97555936392586e-07, "loss": 0.5135, "step": 13107 }, { "epoch": 1.67, "grad_norm": 0.6425147422943809, "learning_rate": 6.970304664250682e-07, "loss": 0.5179, "step": 13108 }, { "epoch": 1.67, "grad_norm": 1.0973343821496306, "learning_rate": 6.965051796212141e-07, "loss": 0.5001, "step": 13109 }, { "epoch": 1.67, "grad_norm": 0.519752104132932, "learning_rate": 6.959800760033814e-07, "loss": 0.3951, "step": 13110 }, { "epoch": 1.67, "grad_norm": 0.6421453261188922, "learning_rate": 6.954551555939243e-07, "loss": 0.464, "step": 13111 }, { "epoch": 1.67, "grad_norm": 0.6727224502816528, "learning_rate": 6.949304184151851e-07, "loss": 0.5036, "step": 13112 }, { "epoch": 1.67, "grad_norm": 0.7430951673481717, "learning_rate": 6.944058644895007e-07, "loss": 0.5453, "step": 13113 }, { "epoch": 1.67, "grad_norm": 1.084635567362419, "learning_rate": 6.938814938392003e-07, "loss": 0.5037, "step": 13114 }, { "epoch": 1.67, "grad_norm": 0.7868495046687567, "learning_rate": 6.933573064866045e-07, "loss": 0.4638, "step": 13115 }, { "epoch": 1.67, "grad_norm": 0.7951499758851783, "learning_rate": 6.928333024540257e-07, "loss": 0.5246, "step": 13116 }, { "epoch": 1.67, "grad_norm": 0.6394009454253002, "learning_rate": 6.923094817637705e-07, "loss": 0.4834, "step": 13117 }, { "epoch": 1.67, "grad_norm": 0.7399179831260733, "learning_rate": 6.917858444381342e-07, "loss": 0.5679, "step": 13118 }, { "epoch": 1.67, "grad_norm": 0.8441815577615446, "learning_rate": 6.912623904994087e-07, "loss": 0.55, "step": 13119 }, { "epoch": 1.67, "grad_norm": 0.772329656550102, "learning_rate": 6.907391199698727e-07, "loss": 0.4944, "step": 13120 }, { "epoch": 1.67, "grad_norm": 0.6067127249468892, "learning_rate": 6.902160328718038e-07, "loss": 0.4899, "step": 13121 }, { "epoch": 1.67, "grad_norm": 0.6745129727388588, "learning_rate": 6.896931292274644e-07, "loss": 0.4187, "step": 13122 }, { "epoch": 1.67, "grad_norm": 0.6547870700840459, "learning_rate": 6.891704090591167e-07, "loss": 0.4775, "step": 13123 }, { "epoch": 1.67, "grad_norm": 0.8493526298592805, "learning_rate": 6.886478723890072e-07, "loss": 0.5682, "step": 13124 }, { "epoch": 1.67, "grad_norm": 0.7190134778685141, "learning_rate": 6.881255192393827e-07, "loss": 0.5317, "step": 13125 }, { "epoch": 1.67, "grad_norm": 0.6714029643099239, "learning_rate": 6.87603349632473e-07, "loss": 0.4769, "step": 13126 }, { "epoch": 1.67, "grad_norm": 0.6700021730109976, "learning_rate": 6.87081363590511e-07, "loss": 0.457, "step": 13127 }, { "epoch": 1.67, "grad_norm": 0.6555147999065288, "learning_rate": 6.865595611357123e-07, "loss": 0.4563, "step": 13128 }, { "epoch": 1.67, "grad_norm": 0.5942512107662891, "learning_rate": 6.860379422902902e-07, "loss": 0.3941, "step": 13129 }, { "epoch": 1.67, "grad_norm": 0.6608242915456689, "learning_rate": 6.855165070764458e-07, "loss": 0.5388, "step": 13130 }, { "epoch": 1.67, "grad_norm": 0.6666543101531537, "learning_rate": 6.849952555163785e-07, "loss": 0.489, "step": 13131 }, { "epoch": 1.67, "grad_norm": 0.6678570729046618, "learning_rate": 6.844741876322724e-07, "loss": 0.4636, "step": 13132 }, { "epoch": 1.67, "grad_norm": 0.7388768678794377, "learning_rate": 6.839533034463113e-07, "loss": 0.5144, "step": 13133 }, { "epoch": 1.67, "grad_norm": 0.6412591660893533, "learning_rate": 6.834326029806654e-07, "loss": 0.4765, "step": 13134 }, { "epoch": 1.67, "grad_norm": 0.7353588921990595, "learning_rate": 6.829120862575001e-07, "loss": 0.5348, "step": 13135 }, { "epoch": 1.67, "grad_norm": 0.7856533306606929, "learning_rate": 6.82391753298971e-07, "loss": 0.4872, "step": 13136 }, { "epoch": 1.67, "grad_norm": 0.5405039815646574, "learning_rate": 6.818716041272289e-07, "loss": 0.4383, "step": 13137 }, { "epoch": 1.67, "grad_norm": 0.6522878911007892, "learning_rate": 6.813516387644131e-07, "loss": 0.448, "step": 13138 }, { "epoch": 1.67, "grad_norm": 0.7470348029203028, "learning_rate": 6.808318572326572e-07, "loss": 0.4921, "step": 13139 }, { "epoch": 1.67, "grad_norm": 0.8320733136508066, "learning_rate": 6.803122595540873e-07, "loss": 0.5428, "step": 13140 }, { "epoch": 1.67, "grad_norm": 0.9853707855903651, "learning_rate": 6.797928457508218e-07, "loss": 0.4503, "step": 13141 }, { "epoch": 1.67, "grad_norm": 0.6178736172870148, "learning_rate": 6.792736158449681e-07, "loss": 0.4437, "step": 13142 }, { "epoch": 1.67, "grad_norm": 0.6136913280372429, "learning_rate": 6.787545698586312e-07, "loss": 0.4115, "step": 13143 }, { "epoch": 1.67, "grad_norm": 0.656775270773385, "learning_rate": 6.782357078139018e-07, "loss": 0.5015, "step": 13144 }, { "epoch": 1.67, "grad_norm": 0.7112774567477834, "learning_rate": 6.777170297328695e-07, "loss": 0.5083, "step": 13145 }, { "epoch": 1.67, "grad_norm": 0.6403312589939059, "learning_rate": 6.771985356376098e-07, "loss": 0.4736, "step": 13146 }, { "epoch": 1.67, "grad_norm": 0.7064117265216866, "learning_rate": 6.766802255501958e-07, "loss": 0.5299, "step": 13147 }, { "epoch": 1.68, "grad_norm": 0.6882748598985095, "learning_rate": 6.761620994926882e-07, "loss": 0.5267, "step": 13148 }, { "epoch": 1.68, "grad_norm": 0.6911411526523836, "learning_rate": 6.756441574871441e-07, "loss": 0.4923, "step": 13149 }, { "epoch": 1.68, "grad_norm": 0.5893606656272917, "learning_rate": 6.75126399555609e-07, "loss": 0.4623, "step": 13150 }, { "epoch": 1.68, "grad_norm": 0.7365390588192582, "learning_rate": 6.746088257201222e-07, "loss": 0.5026, "step": 13151 }, { "epoch": 1.68, "grad_norm": 0.6873638467181916, "learning_rate": 6.740914360027167e-07, "loss": 0.485, "step": 13152 }, { "epoch": 1.68, "grad_norm": 0.7386456006987496, "learning_rate": 6.735742304254145e-07, "loss": 0.4491, "step": 13153 }, { "epoch": 1.68, "grad_norm": 0.7447718976167207, "learning_rate": 6.730572090102327e-07, "loss": 0.5141, "step": 13154 }, { "epoch": 1.68, "grad_norm": 0.6472424754499749, "learning_rate": 6.725403717791778e-07, "loss": 0.4714, "step": 13155 }, { "epoch": 1.68, "grad_norm": 0.7434777205440385, "learning_rate": 6.720237187542517e-07, "loss": 0.5264, "step": 13156 }, { "epoch": 1.68, "grad_norm": 0.644513795047483, "learning_rate": 6.715072499574443e-07, "loss": 0.4733, "step": 13157 }, { "epoch": 1.68, "grad_norm": 0.8276411452713182, "learning_rate": 6.709909654107427e-07, "loss": 0.4412, "step": 13158 }, { "epoch": 1.68, "grad_norm": 0.777784341617413, "learning_rate": 6.704748651361214e-07, "loss": 0.4975, "step": 13159 }, { "epoch": 1.68, "grad_norm": 0.5898627885941914, "learning_rate": 6.699589491555508e-07, "loss": 0.4665, "step": 13160 }, { "epoch": 1.68, "grad_norm": 0.6977464905483004, "learning_rate": 6.694432174909893e-07, "loss": 0.4726, "step": 13161 }, { "epoch": 1.68, "grad_norm": 0.756634661728968, "learning_rate": 6.689276701643921e-07, "loss": 0.4917, "step": 13162 }, { "epoch": 1.68, "grad_norm": 0.6158825466042191, "learning_rate": 6.684123071977039e-07, "loss": 0.4381, "step": 13163 }, { "epoch": 1.68, "grad_norm": 0.6172999933623338, "learning_rate": 6.678971286128627e-07, "loss": 0.4359, "step": 13164 }, { "epoch": 1.68, "grad_norm": 0.6258287417856437, "learning_rate": 6.673821344317966e-07, "loss": 0.474, "step": 13165 }, { "epoch": 1.68, "grad_norm": 0.5634693880775815, "learning_rate": 6.66867324676429e-07, "loss": 0.4076, "step": 13166 }, { "epoch": 1.68, "grad_norm": 0.6457680196993354, "learning_rate": 6.663526993686714e-07, "loss": 0.4381, "step": 13167 }, { "epoch": 1.68, "grad_norm": 0.5985818092107529, "learning_rate": 6.658382585304318e-07, "loss": 0.4655, "step": 13168 }, { "epoch": 1.68, "grad_norm": 0.6395727199955131, "learning_rate": 6.653240021836071e-07, "loss": 0.4494, "step": 13169 }, { "epoch": 1.68, "grad_norm": 0.5725269146216531, "learning_rate": 6.648099303500888e-07, "loss": 0.4668, "step": 13170 }, { "epoch": 1.68, "grad_norm": 0.820141156481712, "learning_rate": 6.642960430517575e-07, "loss": 0.5501, "step": 13171 }, { "epoch": 1.68, "grad_norm": 0.8388650540680296, "learning_rate": 6.637823403104899e-07, "loss": 0.5081, "step": 13172 }, { "epoch": 1.68, "grad_norm": 0.6626023647622545, "learning_rate": 6.632688221481498e-07, "loss": 0.437, "step": 13173 }, { "epoch": 1.68, "grad_norm": 0.618150108265403, "learning_rate": 6.627554885865995e-07, "loss": 0.4534, "step": 13174 }, { "epoch": 1.68, "grad_norm": 0.7261056554587275, "learning_rate": 6.622423396476857e-07, "loss": 0.4795, "step": 13175 }, { "epoch": 1.68, "grad_norm": 0.7629884836633506, "learning_rate": 6.617293753532561e-07, "loss": 0.485, "step": 13176 }, { "epoch": 1.68, "grad_norm": 0.9130823473166543, "learning_rate": 6.612165957251432e-07, "loss": 0.5198, "step": 13177 }, { "epoch": 1.68, "grad_norm": 0.8091334784841943, "learning_rate": 6.607040007851756e-07, "loss": 0.5207, "step": 13178 }, { "epoch": 1.68, "grad_norm": 0.8157274467444912, "learning_rate": 6.601915905551715e-07, "loss": 0.5092, "step": 13179 }, { "epoch": 1.68, "grad_norm": 0.5842665824530223, "learning_rate": 6.596793650569445e-07, "loss": 0.45, "step": 13180 }, { "epoch": 1.68, "grad_norm": 0.6643525512538696, "learning_rate": 6.591673243122959e-07, "loss": 0.4552, "step": 13181 }, { "epoch": 1.68, "grad_norm": 1.3056268286612251, "learning_rate": 6.586554683430241e-07, "loss": 0.5591, "step": 13182 }, { "epoch": 1.68, "grad_norm": 0.7058495330374922, "learning_rate": 6.581437971709154e-07, "loss": 0.4966, "step": 13183 }, { "epoch": 1.68, "grad_norm": 0.5957712235101678, "learning_rate": 6.576323108177513e-07, "loss": 0.4717, "step": 13184 }, { "epoch": 1.68, "grad_norm": 0.6032401342787694, "learning_rate": 6.571210093053027e-07, "loss": 0.4624, "step": 13185 }, { "epoch": 1.68, "grad_norm": 0.7234305689311502, "learning_rate": 6.56609892655336e-07, "loss": 0.4564, "step": 13186 }, { "epoch": 1.68, "grad_norm": 0.6712423232503021, "learning_rate": 6.560989608896051e-07, "loss": 0.4935, "step": 13187 }, { "epoch": 1.68, "grad_norm": 0.7455878778099385, "learning_rate": 6.555882140298608e-07, "loss": 0.5386, "step": 13188 }, { "epoch": 1.68, "grad_norm": 0.8059660223513304, "learning_rate": 6.550776520978435e-07, "loss": 0.5078, "step": 13189 }, { "epoch": 1.68, "grad_norm": 0.7441794947409323, "learning_rate": 6.54567275115287e-07, "loss": 0.5134, "step": 13190 }, { "epoch": 1.68, "grad_norm": 0.7134930805110236, "learning_rate": 6.540570831039145e-07, "loss": 0.453, "step": 13191 }, { "epoch": 1.68, "grad_norm": 0.584599243128046, "learning_rate": 6.535470760854457e-07, "loss": 0.4166, "step": 13192 }, { "epoch": 1.68, "grad_norm": 0.639522096752036, "learning_rate": 6.530372540815871e-07, "loss": 0.4389, "step": 13193 }, { "epoch": 1.68, "grad_norm": 0.6366401541964496, "learning_rate": 6.525276171140426e-07, "loss": 0.4634, "step": 13194 }, { "epoch": 1.68, "grad_norm": 0.6670906138950553, "learning_rate": 6.520181652045044e-07, "loss": 0.5144, "step": 13195 }, { "epoch": 1.68, "grad_norm": 0.7314718794576376, "learning_rate": 6.515088983746598e-07, "loss": 0.4833, "step": 13196 }, { "epoch": 1.68, "grad_norm": 0.586960744243251, "learning_rate": 6.509998166461845e-07, "loss": 0.4889, "step": 13197 }, { "epoch": 1.68, "grad_norm": 0.6655342129008499, "learning_rate": 6.5049092004075e-07, "loss": 0.5034, "step": 13198 }, { "epoch": 1.68, "grad_norm": 0.7117322462343038, "learning_rate": 6.499822085800178e-07, "loss": 0.5, "step": 13199 }, { "epoch": 1.68, "grad_norm": 0.6430570742367782, "learning_rate": 6.494736822856418e-07, "loss": 0.453, "step": 13200 }, { "epoch": 1.68, "grad_norm": 0.6837071214720306, "learning_rate": 6.48965341179269e-07, "loss": 0.4353, "step": 13201 }, { "epoch": 1.68, "grad_norm": 0.5829323479438171, "learning_rate": 6.484571852825389e-07, "loss": 0.4805, "step": 13202 }, { "epoch": 1.68, "grad_norm": 0.6057236705912105, "learning_rate": 6.479492146170796e-07, "loss": 0.4763, "step": 13203 }, { "epoch": 1.68, "grad_norm": 0.5988838543356654, "learning_rate": 6.474414292045162e-07, "loss": 0.404, "step": 13204 }, { "epoch": 1.68, "grad_norm": 0.5952052298100228, "learning_rate": 6.469338290664612e-07, "loss": 0.4305, "step": 13205 }, { "epoch": 1.68, "grad_norm": 0.5823993507279124, "learning_rate": 6.464264142245241e-07, "loss": 0.4621, "step": 13206 }, { "epoch": 1.68, "grad_norm": 0.6240399486407289, "learning_rate": 6.459191847003011e-07, "loss": 0.4716, "step": 13207 }, { "epoch": 1.68, "grad_norm": 0.7134892443520359, "learning_rate": 6.454121405153857e-07, "loss": 0.4634, "step": 13208 }, { "epoch": 1.68, "grad_norm": 0.6700022019679143, "learning_rate": 6.449052816913598e-07, "loss": 0.467, "step": 13209 }, { "epoch": 1.68, "grad_norm": 0.6845268488213182, "learning_rate": 6.443986082497994e-07, "loss": 0.5137, "step": 13210 }, { "epoch": 1.68, "grad_norm": 0.7105767546724752, "learning_rate": 6.438921202122711e-07, "loss": 0.4876, "step": 13211 }, { "epoch": 1.68, "grad_norm": 0.7343116141937275, "learning_rate": 6.433858176003355e-07, "loss": 0.502, "step": 13212 }, { "epoch": 1.68, "grad_norm": 0.6811266190772605, "learning_rate": 6.428797004355442e-07, "loss": 0.4503, "step": 13213 }, { "epoch": 1.68, "grad_norm": 0.6823374565690907, "learning_rate": 6.423737687394399e-07, "loss": 0.4614, "step": 13214 }, { "epoch": 1.68, "grad_norm": 0.6641954788587459, "learning_rate": 6.418680225335605e-07, "loss": 0.4713, "step": 13215 }, { "epoch": 1.68, "grad_norm": 0.595428339705211, "learning_rate": 6.413624618394321e-07, "loss": 0.5016, "step": 13216 }, { "epoch": 1.68, "grad_norm": 0.8467125264795172, "learning_rate": 6.408570866785757e-07, "loss": 0.5369, "step": 13217 }, { "epoch": 1.68, "grad_norm": 0.6501183171750672, "learning_rate": 6.403518970725026e-07, "loss": 0.4974, "step": 13218 }, { "epoch": 1.68, "grad_norm": 0.776430028242742, "learning_rate": 6.398468930427193e-07, "loss": 0.4381, "step": 13219 }, { "epoch": 1.68, "grad_norm": 0.6238258477981995, "learning_rate": 6.393420746107187e-07, "loss": 0.4205, "step": 13220 }, { "epoch": 1.68, "grad_norm": 0.5952675359235899, "learning_rate": 6.388374417979931e-07, "loss": 0.4352, "step": 13221 }, { "epoch": 1.68, "grad_norm": 0.6679658268255422, "learning_rate": 6.383329946260203e-07, "loss": 0.4673, "step": 13222 }, { "epoch": 1.68, "grad_norm": 0.7845364882999578, "learning_rate": 6.378287331162746e-07, "loss": 0.5337, "step": 13223 }, { "epoch": 1.68, "grad_norm": 0.6394595788621876, "learning_rate": 6.373246572902197e-07, "loss": 0.4704, "step": 13224 }, { "epoch": 1.68, "grad_norm": 0.5794196732338062, "learning_rate": 6.368207671693127e-07, "loss": 0.4589, "step": 13225 }, { "epoch": 1.68, "grad_norm": 0.6170022009822458, "learning_rate": 6.363170627750026e-07, "loss": 0.4375, "step": 13226 }, { "epoch": 1.69, "grad_norm": 0.6929057661493806, "learning_rate": 6.358135441287327e-07, "loss": 0.4594, "step": 13227 }, { "epoch": 1.69, "grad_norm": 0.6624533136829404, "learning_rate": 6.353102112519328e-07, "loss": 0.5236, "step": 13228 }, { "epoch": 1.69, "grad_norm": 0.7081855350645778, "learning_rate": 6.348070641660304e-07, "loss": 0.4888, "step": 13229 }, { "epoch": 1.69, "grad_norm": 0.6065173265865423, "learning_rate": 6.343041028924413e-07, "loss": 0.443, "step": 13230 }, { "epoch": 1.69, "grad_norm": 0.5696566914248682, "learning_rate": 6.338013274525767e-07, "loss": 0.4989, "step": 13231 }, { "epoch": 1.69, "grad_norm": 0.7263907376532965, "learning_rate": 6.332987378678362e-07, "loss": 0.5253, "step": 13232 }, { "epoch": 1.69, "grad_norm": 0.7559021700590759, "learning_rate": 6.327963341596161e-07, "loss": 0.4752, "step": 13233 }, { "epoch": 1.69, "grad_norm": 0.7386531658635711, "learning_rate": 6.322941163492985e-07, "loss": 0.4956, "step": 13234 }, { "epoch": 1.69, "grad_norm": 0.6506641061028858, "learning_rate": 6.317920844582647e-07, "loss": 0.4706, "step": 13235 }, { "epoch": 1.69, "grad_norm": 0.65023956919839, "learning_rate": 6.312902385078823e-07, "loss": 0.4595, "step": 13236 }, { "epoch": 1.69, "grad_norm": 0.6474243322552528, "learning_rate": 6.307885785195134e-07, "loss": 0.4863, "step": 13237 }, { "epoch": 1.69, "grad_norm": 0.7350263757451025, "learning_rate": 6.302871045145132e-07, "loss": 0.4929, "step": 13238 }, { "epoch": 1.69, "grad_norm": 0.5475064743556138, "learning_rate": 6.297858165142284e-07, "loss": 0.4125, "step": 13239 }, { "epoch": 1.69, "grad_norm": 0.6902368837710985, "learning_rate": 6.292847145399949e-07, "loss": 0.4414, "step": 13240 }, { "epoch": 1.69, "grad_norm": 0.8203295997289317, "learning_rate": 6.287837986131451e-07, "loss": 0.5278, "step": 13241 }, { "epoch": 1.69, "grad_norm": 0.7043608855382774, "learning_rate": 6.282830687550001e-07, "loss": 0.516, "step": 13242 }, { "epoch": 1.69, "grad_norm": 0.6021872889725036, "learning_rate": 6.277825249868757e-07, "loss": 0.3961, "step": 13243 }, { "epoch": 1.69, "grad_norm": 0.6815745776475196, "learning_rate": 6.272821673300766e-07, "loss": 0.4639, "step": 13244 }, { "epoch": 1.69, "grad_norm": 0.7652816387231558, "learning_rate": 6.267819958059035e-07, "loss": 0.5822, "step": 13245 }, { "epoch": 1.69, "grad_norm": 0.7853848549280263, "learning_rate": 6.262820104356443e-07, "loss": 0.4654, "step": 13246 }, { "epoch": 1.69, "grad_norm": 0.5403957272269746, "learning_rate": 6.257822112405854e-07, "loss": 0.3884, "step": 13247 }, { "epoch": 1.69, "grad_norm": 0.6287099248400456, "learning_rate": 6.252825982419985e-07, "loss": 0.4319, "step": 13248 }, { "epoch": 1.69, "grad_norm": 0.7517375097518871, "learning_rate": 6.24783171461153e-07, "loss": 0.4134, "step": 13249 }, { "epoch": 1.69, "grad_norm": 0.6529569310734861, "learning_rate": 6.242839309193039e-07, "loss": 0.4638, "step": 13250 }, { "epoch": 1.69, "grad_norm": 0.6187498733141361, "learning_rate": 6.237848766377081e-07, "loss": 0.4749, "step": 13251 }, { "epoch": 1.69, "grad_norm": 0.7459883965011078, "learning_rate": 6.232860086376041e-07, "loss": 0.5635, "step": 13252 }, { "epoch": 1.69, "grad_norm": 0.721777051325029, "learning_rate": 6.227873269402296e-07, "loss": 0.5109, "step": 13253 }, { "epoch": 1.69, "grad_norm": 0.6606544726151848, "learning_rate": 6.222888315668107e-07, "loss": 0.5309, "step": 13254 }, { "epoch": 1.69, "grad_norm": 0.5887006528238423, "learning_rate": 6.217905225385673e-07, "loss": 0.4347, "step": 13255 }, { "epoch": 1.69, "grad_norm": 0.691183812499653, "learning_rate": 6.212923998767101e-07, "loss": 0.4885, "step": 13256 }, { "epoch": 1.69, "grad_norm": 0.7808667005668951, "learning_rate": 6.207944636024443e-07, "loss": 0.5356, "step": 13257 }, { "epoch": 1.69, "grad_norm": 0.7366146545024664, "learning_rate": 6.202967137369626e-07, "loss": 0.4976, "step": 13258 }, { "epoch": 1.69, "grad_norm": 0.5807315272470808, "learning_rate": 6.197991503014555e-07, "loss": 0.4064, "step": 13259 }, { "epoch": 1.69, "grad_norm": 0.6308629208205985, "learning_rate": 6.193017733171009e-07, "loss": 0.4855, "step": 13260 }, { "epoch": 1.69, "grad_norm": 0.6729011823667962, "learning_rate": 6.188045828050715e-07, "loss": 0.432, "step": 13261 }, { "epoch": 1.69, "grad_norm": 0.648345158568187, "learning_rate": 6.183075787865301e-07, "loss": 0.4553, "step": 13262 }, { "epoch": 1.69, "grad_norm": 0.6576154073999861, "learning_rate": 6.178107612826329e-07, "loss": 0.4592, "step": 13263 }, { "epoch": 1.69, "grad_norm": 0.7053884728990202, "learning_rate": 6.17314130314528e-07, "loss": 0.4768, "step": 13264 }, { "epoch": 1.69, "grad_norm": 0.7873323063344049, "learning_rate": 6.16817685903357e-07, "loss": 0.5504, "step": 13265 }, { "epoch": 1.69, "grad_norm": 0.6369026966417681, "learning_rate": 6.163214280702496e-07, "loss": 0.4891, "step": 13266 }, { "epoch": 1.69, "grad_norm": 0.8081931100378871, "learning_rate": 6.15825356836331e-07, "loss": 0.5278, "step": 13267 }, { "epoch": 1.69, "grad_norm": 0.5819044064966594, "learning_rate": 6.153294722227165e-07, "loss": 0.423, "step": 13268 }, { "epoch": 1.69, "grad_norm": 0.7044260902221128, "learning_rate": 6.148337742505162e-07, "loss": 0.4709, "step": 13269 }, { "epoch": 1.69, "grad_norm": 0.585999404742845, "learning_rate": 6.143382629408278e-07, "loss": 0.4544, "step": 13270 }, { "epoch": 1.69, "grad_norm": 0.6569774412539903, "learning_rate": 6.138429383147465e-07, "loss": 0.4039, "step": 13271 }, { "epoch": 1.69, "grad_norm": 0.6415569096869321, "learning_rate": 6.133478003933535e-07, "loss": 0.494, "step": 13272 }, { "epoch": 1.69, "grad_norm": 0.8093899064119583, "learning_rate": 6.128528491977281e-07, "loss": 0.4681, "step": 13273 }, { "epoch": 1.69, "grad_norm": 0.6270334986494188, "learning_rate": 6.123580847489368e-07, "loss": 0.4958, "step": 13274 }, { "epoch": 1.69, "grad_norm": 0.7641585523605698, "learning_rate": 6.118635070680407e-07, "loss": 0.486, "step": 13275 }, { "epoch": 1.69, "grad_norm": 1.0009591721374413, "learning_rate": 6.113691161760938e-07, "loss": 0.5966, "step": 13276 }, { "epoch": 1.69, "grad_norm": 0.7618903329323423, "learning_rate": 6.108749120941387e-07, "loss": 0.4587, "step": 13277 }, { "epoch": 1.69, "grad_norm": 0.6045797966573727, "learning_rate": 6.103808948432138e-07, "loss": 0.4876, "step": 13278 }, { "epoch": 1.69, "grad_norm": 0.6601522697096782, "learning_rate": 6.098870644443461e-07, "loss": 0.4364, "step": 13279 }, { "epoch": 1.69, "grad_norm": 0.9147808240281305, "learning_rate": 6.093934209185581e-07, "loss": 0.4795, "step": 13280 }, { "epoch": 1.69, "grad_norm": 0.7181725551150726, "learning_rate": 6.088999642868609e-07, "loss": 0.4653, "step": 13281 }, { "epoch": 1.69, "grad_norm": 0.642393746637176, "learning_rate": 6.08406694570261e-07, "loss": 0.4493, "step": 13282 }, { "epoch": 1.69, "grad_norm": 0.6493198783828626, "learning_rate": 6.079136117897538e-07, "loss": 0.5088, "step": 13283 }, { "epoch": 1.69, "grad_norm": 0.7044739356368885, "learning_rate": 6.074207159663303e-07, "loss": 0.5019, "step": 13284 }, { "epoch": 1.69, "grad_norm": 0.6534631448482686, "learning_rate": 6.069280071209688e-07, "loss": 0.4933, "step": 13285 }, { "epoch": 1.69, "grad_norm": 0.68704142612423, "learning_rate": 6.06435485274644e-07, "loss": 0.5144, "step": 13286 }, { "epoch": 1.69, "grad_norm": 0.7947026802709749, "learning_rate": 6.059431504483204e-07, "loss": 0.4211, "step": 13287 }, { "epoch": 1.69, "grad_norm": 0.6684386382395396, "learning_rate": 6.054510026629573e-07, "loss": 0.491, "step": 13288 }, { "epoch": 1.69, "grad_norm": 0.7114349300135769, "learning_rate": 6.049590419395001e-07, "loss": 0.4834, "step": 13289 }, { "epoch": 1.69, "grad_norm": 0.6570781394762032, "learning_rate": 6.04467268298894e-07, "loss": 0.452, "step": 13290 }, { "epoch": 1.69, "grad_norm": 0.6001633510449826, "learning_rate": 6.039756817620685e-07, "loss": 0.5148, "step": 13291 }, { "epoch": 1.69, "grad_norm": 0.5439518892115852, "learning_rate": 6.034842823499515e-07, "loss": 0.4186, "step": 13292 }, { "epoch": 1.69, "grad_norm": 0.6736206923557385, "learning_rate": 6.029930700834585e-07, "loss": 0.4907, "step": 13293 }, { "epoch": 1.69, "grad_norm": 0.7762451802558228, "learning_rate": 6.025020449835006e-07, "loss": 0.5114, "step": 13294 }, { "epoch": 1.69, "grad_norm": 0.6870229448599613, "learning_rate": 6.020112070709771e-07, "loss": 0.5233, "step": 13295 }, { "epoch": 1.69, "grad_norm": 0.7524910158126882, "learning_rate": 6.015205563667842e-07, "loss": 0.4994, "step": 13296 }, { "epoch": 1.69, "grad_norm": 0.5897375675961104, "learning_rate": 6.010300928918039e-07, "loss": 0.4533, "step": 13297 }, { "epoch": 1.69, "grad_norm": 0.638068976499653, "learning_rate": 6.005398166669168e-07, "loss": 0.5053, "step": 13298 }, { "epoch": 1.69, "grad_norm": 0.6123596559950154, "learning_rate": 6.000497277129891e-07, "loss": 0.4458, "step": 13299 }, { "epoch": 1.69, "grad_norm": 0.6104369110696504, "learning_rate": 5.995598260508861e-07, "loss": 0.4837, "step": 13300 }, { "epoch": 1.69, "grad_norm": 0.7062454274679989, "learning_rate": 5.990701117014591e-07, "loss": 0.4714, "step": 13301 }, { "epoch": 1.69, "grad_norm": 0.7947684352555169, "learning_rate": 5.985805846855541e-07, "loss": 0.4692, "step": 13302 }, { "epoch": 1.69, "grad_norm": 0.6784244547859919, "learning_rate": 5.980912450240083e-07, "loss": 0.4805, "step": 13303 }, { "epoch": 1.69, "grad_norm": 0.7891188370150046, "learning_rate": 5.97602092737653e-07, "loss": 0.4908, "step": 13304 }, { "epoch": 1.7, "grad_norm": 0.7555822265552243, "learning_rate": 5.971131278473069e-07, "loss": 0.5059, "step": 13305 }, { "epoch": 1.7, "grad_norm": 0.5927466264506613, "learning_rate": 5.966243503737867e-07, "loss": 0.4589, "step": 13306 }, { "epoch": 1.7, "grad_norm": 0.8287817905148487, "learning_rate": 5.961357603378953e-07, "loss": 0.5498, "step": 13307 }, { "epoch": 1.7, "grad_norm": 0.8167642007908147, "learning_rate": 5.956473577604333e-07, "loss": 0.5755, "step": 13308 }, { "epoch": 1.7, "grad_norm": 0.7228501063943434, "learning_rate": 5.951591426621878e-07, "loss": 0.5053, "step": 13309 }, { "epoch": 1.7, "grad_norm": 0.6332786892821344, "learning_rate": 5.946711150639428e-07, "loss": 0.5005, "step": 13310 }, { "epoch": 1.7, "grad_norm": 0.8484064613612927, "learning_rate": 5.941832749864701e-07, "loss": 0.5165, "step": 13311 }, { "epoch": 1.7, "grad_norm": 0.6341756910406522, "learning_rate": 5.936956224505363e-07, "loss": 0.4541, "step": 13312 }, { "epoch": 1.7, "grad_norm": 0.7458581830943323, "learning_rate": 5.932081574768994e-07, "loss": 0.4969, "step": 13313 }, { "epoch": 1.7, "grad_norm": 0.5873594478360663, "learning_rate": 5.927208800863099e-07, "loss": 0.4219, "step": 13314 }, { "epoch": 1.7, "grad_norm": 0.6402812143508356, "learning_rate": 5.922337902995084e-07, "loss": 0.4585, "step": 13315 }, { "epoch": 1.7, "grad_norm": 0.7405776272977772, "learning_rate": 5.917468881372295e-07, "loss": 0.4963, "step": 13316 }, { "epoch": 1.7, "grad_norm": 0.8122006172130778, "learning_rate": 5.912601736201984e-07, "loss": 0.5663, "step": 13317 }, { "epoch": 1.7, "grad_norm": 0.7467276538643776, "learning_rate": 5.907736467691344e-07, "loss": 0.5149, "step": 13318 }, { "epoch": 1.7, "grad_norm": 0.6098748884889496, "learning_rate": 5.90287307604745e-07, "loss": 0.4437, "step": 13319 }, { "epoch": 1.7, "grad_norm": 0.7356370919984131, "learning_rate": 5.898011561477346e-07, "loss": 0.5015, "step": 13320 }, { "epoch": 1.7, "grad_norm": 0.9281278736830941, "learning_rate": 5.893151924187951e-07, "loss": 0.5438, "step": 13321 }, { "epoch": 1.7, "grad_norm": 0.8011992222367917, "learning_rate": 5.888294164386144e-07, "loss": 0.5443, "step": 13322 }, { "epoch": 1.7, "grad_norm": 0.6080266298484827, "learning_rate": 5.883438282278686e-07, "loss": 0.4802, "step": 13323 }, { "epoch": 1.7, "grad_norm": 0.787902629401806, "learning_rate": 5.87858427807228e-07, "loss": 0.4806, "step": 13324 }, { "epoch": 1.7, "grad_norm": 0.7878562623579041, "learning_rate": 5.873732151973549e-07, "loss": 0.5472, "step": 13325 }, { "epoch": 1.7, "grad_norm": 0.6942491657141394, "learning_rate": 5.868881904189045e-07, "loss": 0.5014, "step": 13326 }, { "epoch": 1.7, "grad_norm": 0.7992200109449441, "learning_rate": 5.864033534925201e-07, "loss": 0.466, "step": 13327 }, { "epoch": 1.7, "grad_norm": 0.7474421365598086, "learning_rate": 5.859187044388425e-07, "loss": 0.4675, "step": 13328 }, { "epoch": 1.7, "grad_norm": 0.6155005515016349, "learning_rate": 5.854342432784993e-07, "loss": 0.4642, "step": 13329 }, { "epoch": 1.7, "grad_norm": 0.7288813571234856, "learning_rate": 5.849499700321143e-07, "loss": 0.4846, "step": 13330 }, { "epoch": 1.7, "grad_norm": 0.6261665626183406, "learning_rate": 5.844658847203e-07, "loss": 0.4524, "step": 13331 }, { "epoch": 1.7, "grad_norm": 0.6185691551217555, "learning_rate": 5.839819873636631e-07, "loss": 0.4918, "step": 13332 }, { "epoch": 1.7, "grad_norm": 0.5540298799680585, "learning_rate": 5.834982779828013e-07, "loss": 0.4486, "step": 13333 }, { "epoch": 1.7, "grad_norm": 0.5710911493703007, "learning_rate": 5.830147565983052e-07, "loss": 0.4193, "step": 13334 }, { "epoch": 1.7, "grad_norm": 0.6730385946567293, "learning_rate": 5.825314232307555e-07, "loss": 0.5031, "step": 13335 }, { "epoch": 1.7, "grad_norm": 0.7496955140957168, "learning_rate": 5.820482779007264e-07, "loss": 0.5275, "step": 13336 }, { "epoch": 1.7, "grad_norm": 0.659206345228007, "learning_rate": 5.815653206287858e-07, "loss": 0.4957, "step": 13337 }, { "epoch": 1.7, "grad_norm": 0.5675387457762078, "learning_rate": 5.81082551435489e-07, "loss": 0.4991, "step": 13338 }, { "epoch": 1.7, "grad_norm": 0.6798924098333802, "learning_rate": 5.805999703413884e-07, "loss": 0.4767, "step": 13339 }, { "epoch": 1.7, "grad_norm": 0.6403622879255059, "learning_rate": 5.801175773670237e-07, "loss": 0.5122, "step": 13340 }, { "epoch": 1.7, "grad_norm": 0.7715886174956222, "learning_rate": 5.796353725329307e-07, "loss": 0.4839, "step": 13341 }, { "epoch": 1.7, "grad_norm": 0.7664182327975534, "learning_rate": 5.791533558596335e-07, "loss": 0.4755, "step": 13342 }, { "epoch": 1.7, "grad_norm": 0.6135913237584542, "learning_rate": 5.786715273676519e-07, "loss": 0.4428, "step": 13343 }, { "epoch": 1.7, "grad_norm": 0.7228449578669016, "learning_rate": 5.781898870774932e-07, "loss": 0.4937, "step": 13344 }, { "epoch": 1.7, "grad_norm": 0.5763649804346269, "learning_rate": 5.777084350096629e-07, "loss": 0.4885, "step": 13345 }, { "epoch": 1.7, "grad_norm": 0.732568785150028, "learning_rate": 5.772271711846511e-07, "loss": 0.4968, "step": 13346 }, { "epoch": 1.7, "grad_norm": 0.6180875622425891, "learning_rate": 5.76746095622947e-07, "loss": 0.3946, "step": 13347 }, { "epoch": 1.7, "grad_norm": 0.6258451309430735, "learning_rate": 5.762652083450259e-07, "loss": 0.515, "step": 13348 }, { "epoch": 1.7, "grad_norm": 0.8084715646563958, "learning_rate": 5.757845093713582e-07, "loss": 0.5029, "step": 13349 }, { "epoch": 1.7, "grad_norm": 0.6150932145352855, "learning_rate": 5.753039987224068e-07, "loss": 0.5393, "step": 13350 }, { "epoch": 1.7, "grad_norm": 0.7008868264947624, "learning_rate": 5.748236764186255e-07, "loss": 0.4611, "step": 13351 }, { "epoch": 1.7, "grad_norm": 0.60279522015059, "learning_rate": 5.743435424804583e-07, "loss": 0.4579, "step": 13352 }, { "epoch": 1.7, "grad_norm": 0.6963196159816807, "learning_rate": 5.738635969283451e-07, "loss": 0.4672, "step": 13353 }, { "epoch": 1.7, "grad_norm": 0.7328306019994018, "learning_rate": 5.73383839782714e-07, "loss": 0.5044, "step": 13354 }, { "epoch": 1.7, "grad_norm": 0.7667410947477192, "learning_rate": 5.729042710639881e-07, "loss": 0.5189, "step": 13355 }, { "epoch": 1.7, "grad_norm": 0.658590495729361, "learning_rate": 5.7242489079258e-07, "loss": 0.386, "step": 13356 }, { "epoch": 1.7, "grad_norm": 0.6428190717867096, "learning_rate": 5.719456989888961e-07, "loss": 0.4937, "step": 13357 }, { "epoch": 1.7, "grad_norm": 0.5774681174488103, "learning_rate": 5.714666956733333e-07, "loss": 0.4482, "step": 13358 }, { "epoch": 1.7, "grad_norm": 0.6598026536557327, "learning_rate": 5.709878808662822e-07, "loss": 0.5067, "step": 13359 }, { "epoch": 1.7, "grad_norm": 0.6981788910088081, "learning_rate": 5.70509254588123e-07, "loss": 0.5657, "step": 13360 }, { "epoch": 1.7, "grad_norm": 0.7683745823331873, "learning_rate": 5.700308168592306e-07, "loss": 0.5026, "step": 13361 }, { "epoch": 1.7, "grad_norm": 0.6426707910006034, "learning_rate": 5.695525676999697e-07, "loss": 0.4446, "step": 13362 }, { "epoch": 1.7, "grad_norm": 0.6754098392622393, "learning_rate": 5.690745071306997e-07, "loss": 0.4413, "step": 13363 }, { "epoch": 1.7, "grad_norm": 0.7104544223046783, "learning_rate": 5.685966351717675e-07, "loss": 0.487, "step": 13364 }, { "epoch": 1.7, "grad_norm": 0.6000844843818912, "learning_rate": 5.681189518435171e-07, "loss": 0.4234, "step": 13365 }, { "epoch": 1.7, "grad_norm": 0.6155763840595415, "learning_rate": 5.676414571662791e-07, "loss": 0.4699, "step": 13366 }, { "epoch": 1.7, "grad_norm": 0.7834183283721327, "learning_rate": 5.67164151160382e-07, "loss": 0.4986, "step": 13367 }, { "epoch": 1.7, "grad_norm": 0.6745607081372177, "learning_rate": 5.666870338461406e-07, "loss": 0.448, "step": 13368 }, { "epoch": 1.7, "grad_norm": 0.636315886245817, "learning_rate": 5.662101052438657e-07, "loss": 0.4719, "step": 13369 }, { "epoch": 1.7, "grad_norm": 0.6122587366787632, "learning_rate": 5.657333653738578e-07, "loss": 0.4148, "step": 13370 }, { "epoch": 1.7, "grad_norm": 0.5906616751333879, "learning_rate": 5.652568142564119e-07, "loss": 0.4381, "step": 13371 }, { "epoch": 1.7, "grad_norm": 0.7337194285741252, "learning_rate": 5.647804519118105e-07, "loss": 0.4952, "step": 13372 }, { "epoch": 1.7, "grad_norm": 1.0380785415037816, "learning_rate": 5.643042783603336e-07, "loss": 0.5074, "step": 13373 }, { "epoch": 1.7, "grad_norm": 0.7069060541007551, "learning_rate": 5.638282936222466e-07, "loss": 0.515, "step": 13374 }, { "epoch": 1.7, "grad_norm": 0.7233592627731629, "learning_rate": 5.633524977178157e-07, "loss": 0.4919, "step": 13375 }, { "epoch": 1.7, "grad_norm": 0.6595953071861914, "learning_rate": 5.628768906672904e-07, "loss": 0.4775, "step": 13376 }, { "epoch": 1.7, "grad_norm": 0.7425855243143294, "learning_rate": 5.624014724909177e-07, "loss": 0.5063, "step": 13377 }, { "epoch": 1.7, "grad_norm": 0.8330808705239865, "learning_rate": 5.619262432089328e-07, "loss": 0.5214, "step": 13378 }, { "epoch": 1.7, "grad_norm": 0.5847542424331638, "learning_rate": 5.61451202841567e-07, "loss": 0.4732, "step": 13379 }, { "epoch": 1.7, "grad_norm": 0.8262167949737804, "learning_rate": 5.609763514090389e-07, "loss": 0.5227, "step": 13380 }, { "epoch": 1.7, "grad_norm": 0.8392914473344257, "learning_rate": 5.605016889315629e-07, "loss": 0.6132, "step": 13381 }, { "epoch": 1.7, "grad_norm": 1.37797183815575, "learning_rate": 5.600272154293429e-07, "loss": 0.5412, "step": 13382 }, { "epoch": 1.7, "grad_norm": 0.8037076800990429, "learning_rate": 5.595529309225767e-07, "loss": 0.4826, "step": 13383 }, { "epoch": 1.71, "grad_norm": 0.7060097193971053, "learning_rate": 5.59078835431452e-07, "loss": 0.4623, "step": 13384 }, { "epoch": 1.71, "grad_norm": 0.7708560791794763, "learning_rate": 5.586049289761514e-07, "loss": 0.5203, "step": 13385 }, { "epoch": 1.71, "grad_norm": 0.5958759761624182, "learning_rate": 5.581312115768439e-07, "loss": 0.4413, "step": 13386 }, { "epoch": 1.71, "grad_norm": 0.6060154045256066, "learning_rate": 5.576576832536984e-07, "loss": 0.4505, "step": 13387 }, { "epoch": 1.71, "grad_norm": 0.6499689306922725, "learning_rate": 5.571843440268687e-07, "loss": 0.4868, "step": 13388 }, { "epoch": 1.71, "grad_norm": 0.6826839810545408, "learning_rate": 5.567111939165054e-07, "loss": 0.49, "step": 13389 }, { "epoch": 1.71, "grad_norm": 0.6090453535338727, "learning_rate": 5.562382329427468e-07, "loss": 0.4107, "step": 13390 }, { "epoch": 1.71, "grad_norm": 0.9709415044505403, "learning_rate": 5.557654611257273e-07, "loss": 0.4745, "step": 13391 }, { "epoch": 1.71, "grad_norm": 0.709584640049621, "learning_rate": 5.552928784855699e-07, "loss": 0.4116, "step": 13392 }, { "epoch": 1.71, "grad_norm": 0.6459074813515838, "learning_rate": 5.548204850423916e-07, "loss": 0.4036, "step": 13393 }, { "epoch": 1.71, "grad_norm": 0.5541100532626158, "learning_rate": 5.543482808163003e-07, "loss": 0.3929, "step": 13394 }, { "epoch": 1.71, "grad_norm": 0.7426584054019043, "learning_rate": 5.538762658273966e-07, "loss": 0.4776, "step": 13395 }, { "epoch": 1.71, "grad_norm": 0.6899576880867103, "learning_rate": 5.534044400957722e-07, "loss": 0.464, "step": 13396 }, { "epoch": 1.71, "grad_norm": 0.6575788837814819, "learning_rate": 5.529328036415122e-07, "loss": 0.4282, "step": 13397 }, { "epoch": 1.71, "grad_norm": 0.6033789623175871, "learning_rate": 5.524613564846909e-07, "loss": 0.4667, "step": 13398 }, { "epoch": 1.71, "grad_norm": 0.5526282835715132, "learning_rate": 5.519900986453774e-07, "loss": 0.4426, "step": 13399 }, { "epoch": 1.71, "grad_norm": 0.6687017196398288, "learning_rate": 5.515190301436324e-07, "loss": 0.5118, "step": 13400 }, { "epoch": 1.71, "grad_norm": 0.6574492968600151, "learning_rate": 5.510481509995064e-07, "loss": 0.4616, "step": 13401 }, { "epoch": 1.71, "grad_norm": 0.7946476720998246, "learning_rate": 5.505774612330444e-07, "loss": 0.5373, "step": 13402 }, { "epoch": 1.71, "grad_norm": 1.5274641476069164, "learning_rate": 5.501069608642812e-07, "loss": 0.5097, "step": 13403 }, { "epoch": 1.71, "grad_norm": 0.6314147547383917, "learning_rate": 5.496366499132449e-07, "loss": 0.4351, "step": 13404 }, { "epoch": 1.71, "grad_norm": 0.6232649286118136, "learning_rate": 5.491665283999548e-07, "loss": 0.4306, "step": 13405 }, { "epoch": 1.71, "grad_norm": 0.6340596528539817, "learning_rate": 5.486965963444235e-07, "loss": 0.5248, "step": 13406 }, { "epoch": 1.71, "grad_norm": 0.8153257376216062, "learning_rate": 5.48226853766653e-07, "loss": 0.4958, "step": 13407 }, { "epoch": 1.71, "grad_norm": 0.7200693584824995, "learning_rate": 5.477573006866399e-07, "loss": 0.4721, "step": 13408 }, { "epoch": 1.71, "grad_norm": 0.7263811299201134, "learning_rate": 5.472879371243706e-07, "loss": 0.4616, "step": 13409 }, { "epoch": 1.71, "grad_norm": 0.7440678735966529, "learning_rate": 5.46818763099825e-07, "loss": 0.4414, "step": 13410 }, { "epoch": 1.71, "grad_norm": 0.5707027359573893, "learning_rate": 5.463497786329741e-07, "loss": 0.4392, "step": 13411 }, { "epoch": 1.71, "grad_norm": 0.5886229635191678, "learning_rate": 5.458809837437829e-07, "loss": 0.4588, "step": 13412 }, { "epoch": 1.71, "grad_norm": 0.7177190653947748, "learning_rate": 5.45412378452203e-07, "loss": 0.4904, "step": 13413 }, { "epoch": 1.71, "grad_norm": 0.6554290566859238, "learning_rate": 5.449439627781849e-07, "loss": 0.4832, "step": 13414 }, { "epoch": 1.71, "grad_norm": 0.6333449418235072, "learning_rate": 5.444757367416653e-07, "loss": 0.4823, "step": 13415 }, { "epoch": 1.71, "grad_norm": 0.6243784774277185, "learning_rate": 5.44007700362576e-07, "loss": 0.4951, "step": 13416 }, { "epoch": 1.71, "grad_norm": 0.683961817196155, "learning_rate": 5.435398536608388e-07, "loss": 0.5331, "step": 13417 }, { "epoch": 1.71, "grad_norm": 0.7203665886537614, "learning_rate": 5.430721966563707e-07, "loss": 0.4853, "step": 13418 }, { "epoch": 1.71, "grad_norm": 0.6767839737968374, "learning_rate": 5.426047293690756e-07, "loss": 0.4442, "step": 13419 }, { "epoch": 1.71, "grad_norm": 0.6016902573212886, "learning_rate": 5.421374518188544e-07, "loss": 0.475, "step": 13420 }, { "epoch": 1.71, "grad_norm": 0.802499135984858, "learning_rate": 5.416703640255949e-07, "loss": 0.5657, "step": 13421 }, { "epoch": 1.71, "grad_norm": 0.7460569730911966, "learning_rate": 5.412034660091831e-07, "loss": 0.5204, "step": 13422 }, { "epoch": 1.71, "grad_norm": 0.7577995300535245, "learning_rate": 5.40736757789489e-07, "loss": 0.4974, "step": 13423 }, { "epoch": 1.71, "grad_norm": 0.8063098793401025, "learning_rate": 5.402702393863835e-07, "loss": 0.5404, "step": 13424 }, { "epoch": 1.71, "grad_norm": 0.7952471416838907, "learning_rate": 5.398039108197217e-07, "loss": 0.5854, "step": 13425 }, { "epoch": 1.71, "grad_norm": 0.7447948919776203, "learning_rate": 5.393377721093556e-07, "loss": 0.5466, "step": 13426 }, { "epoch": 1.71, "grad_norm": 0.6546642688847119, "learning_rate": 5.388718232751255e-07, "loss": 0.4827, "step": 13427 }, { "epoch": 1.71, "grad_norm": 0.6659201426078573, "learning_rate": 5.384060643368671e-07, "loss": 0.4961, "step": 13428 }, { "epoch": 1.71, "grad_norm": 0.646671682394813, "learning_rate": 5.37940495314404e-07, "loss": 0.4811, "step": 13429 }, { "epoch": 1.71, "grad_norm": 0.686857166637779, "learning_rate": 5.374751162275566e-07, "loss": 0.4543, "step": 13430 }, { "epoch": 1.71, "grad_norm": 0.640855090895263, "learning_rate": 5.370099270961326e-07, "loss": 0.4719, "step": 13431 }, { "epoch": 1.71, "grad_norm": 0.7120349867353833, "learning_rate": 5.365449279399349e-07, "loss": 0.5417, "step": 13432 }, { "epoch": 1.71, "grad_norm": 0.840011307376331, "learning_rate": 5.360801187787557e-07, "loss": 0.5019, "step": 13433 }, { "epoch": 1.71, "grad_norm": 0.7276677411086282, "learning_rate": 5.35615499632382e-07, "loss": 0.4823, "step": 13434 }, { "epoch": 1.71, "grad_norm": 0.592391850235361, "learning_rate": 5.351510705205892e-07, "loss": 0.4389, "step": 13435 }, { "epoch": 1.71, "grad_norm": 0.5790892551111707, "learning_rate": 5.346868314631481e-07, "loss": 0.4558, "step": 13436 }, { "epoch": 1.71, "grad_norm": 0.639531931678247, "learning_rate": 5.34222782479819e-07, "loss": 0.4489, "step": 13437 }, { "epoch": 1.71, "grad_norm": 0.574153119719629, "learning_rate": 5.337589235903567e-07, "loss": 0.4233, "step": 13438 }, { "epoch": 1.71, "grad_norm": 0.5397987130314181, "learning_rate": 5.332952548145043e-07, "loss": 0.4018, "step": 13439 }, { "epoch": 1.71, "grad_norm": 0.6445801618259048, "learning_rate": 5.328317761719998e-07, "loss": 0.4294, "step": 13440 }, { "epoch": 1.71, "grad_norm": 0.7697755849633073, "learning_rate": 5.323684876825708e-07, "loss": 0.5404, "step": 13441 }, { "epoch": 1.71, "grad_norm": 0.778580611108743, "learning_rate": 5.319053893659398e-07, "loss": 0.5275, "step": 13442 }, { "epoch": 1.71, "grad_norm": 0.7032036943095256, "learning_rate": 5.314424812418179e-07, "loss": 0.4834, "step": 13443 }, { "epoch": 1.71, "grad_norm": 0.7066091124372516, "learning_rate": 5.309797633299102e-07, "loss": 0.4549, "step": 13444 }, { "epoch": 1.71, "grad_norm": 0.7906771861495535, "learning_rate": 5.305172356499128e-07, "loss": 0.5457, "step": 13445 }, { "epoch": 1.71, "grad_norm": 0.8050563191892861, "learning_rate": 5.300548982215148e-07, "loss": 0.5394, "step": 13446 }, { "epoch": 1.71, "grad_norm": 0.6745136768081252, "learning_rate": 5.29592751064395e-07, "loss": 0.4269, "step": 13447 }, { "epoch": 1.71, "grad_norm": 0.593199466259227, "learning_rate": 5.29130794198226e-07, "loss": 0.439, "step": 13448 }, { "epoch": 1.71, "grad_norm": 0.6777181309354753, "learning_rate": 5.286690276426726e-07, "loss": 0.4373, "step": 13449 }, { "epoch": 1.71, "grad_norm": 0.6255065557710193, "learning_rate": 5.282074514173918e-07, "loss": 0.4888, "step": 13450 }, { "epoch": 1.71, "grad_norm": 0.7276607714110438, "learning_rate": 5.277460655420285e-07, "loss": 0.5783, "step": 13451 }, { "epoch": 1.71, "grad_norm": 0.7327755072041363, "learning_rate": 5.272848700362248e-07, "loss": 0.485, "step": 13452 }, { "epoch": 1.71, "grad_norm": 0.5411547755199434, "learning_rate": 5.268238649196106e-07, "loss": 0.3933, "step": 13453 }, { "epoch": 1.71, "grad_norm": 0.6821327191943527, "learning_rate": 5.263630502118111e-07, "loss": 0.4649, "step": 13454 }, { "epoch": 1.71, "grad_norm": 0.5974217881553601, "learning_rate": 5.259024259324403e-07, "loss": 0.4475, "step": 13455 }, { "epoch": 1.71, "grad_norm": 0.7235750000145602, "learning_rate": 5.254419921011067e-07, "loss": 0.5377, "step": 13456 }, { "epoch": 1.71, "grad_norm": 1.259906786986836, "learning_rate": 5.249817487374076e-07, "loss": 0.4955, "step": 13457 }, { "epoch": 1.71, "grad_norm": 0.6399285789575561, "learning_rate": 5.245216958609367e-07, "loss": 0.4802, "step": 13458 }, { "epoch": 1.71, "grad_norm": 0.6505134880383475, "learning_rate": 5.24061833491275e-07, "loss": 0.4558, "step": 13459 }, { "epoch": 1.71, "grad_norm": 0.7943712126639234, "learning_rate": 5.236021616479975e-07, "loss": 0.5125, "step": 13460 }, { "epoch": 1.71, "grad_norm": 1.4439097511278647, "learning_rate": 5.231426803506723e-07, "loss": 0.543, "step": 13461 }, { "epoch": 1.72, "grad_norm": 0.799506855537563, "learning_rate": 5.226833896188565e-07, "loss": 0.4945, "step": 13462 }, { "epoch": 1.72, "grad_norm": 2.736684636902891, "learning_rate": 5.222242894721025e-07, "loss": 0.4693, "step": 13463 }, { "epoch": 1.72, "grad_norm": 0.9475922498463931, "learning_rate": 5.217653799299499e-07, "loss": 0.4918, "step": 13464 }, { "epoch": 1.72, "grad_norm": 0.6439909087377487, "learning_rate": 5.213066610119366e-07, "loss": 0.467, "step": 13465 }, { "epoch": 1.72, "grad_norm": 0.7845230609692545, "learning_rate": 5.208481327375852e-07, "loss": 0.5271, "step": 13466 }, { "epoch": 1.72, "grad_norm": 0.8115798053438208, "learning_rate": 5.20389795126417e-07, "loss": 0.5335, "step": 13467 }, { "epoch": 1.72, "grad_norm": 0.8578298257396454, "learning_rate": 5.199316481979388e-07, "loss": 0.5386, "step": 13468 }, { "epoch": 1.72, "grad_norm": 0.7948247593581265, "learning_rate": 5.194736919716553e-07, "loss": 0.5286, "step": 13469 }, { "epoch": 1.72, "grad_norm": 0.6066679443004858, "learning_rate": 5.190159264670585e-07, "loss": 0.4694, "step": 13470 }, { "epoch": 1.72, "grad_norm": 0.6414319330770931, "learning_rate": 5.185583517036341e-07, "loss": 0.4901, "step": 13471 }, { "epoch": 1.72, "grad_norm": 0.67294953009147, "learning_rate": 5.181009677008609e-07, "loss": 0.496, "step": 13472 }, { "epoch": 1.72, "grad_norm": 0.7603777738843328, "learning_rate": 5.176437744782071e-07, "loss": 0.5338, "step": 13473 }, { "epoch": 1.72, "grad_norm": 0.8254172490760958, "learning_rate": 5.171867720551332e-07, "loss": 0.4979, "step": 13474 }, { "epoch": 1.72, "grad_norm": 0.6346674624956858, "learning_rate": 5.16729960451095e-07, "loss": 0.4553, "step": 13475 }, { "epoch": 1.72, "grad_norm": 0.6143406327680423, "learning_rate": 5.162733396855352e-07, "loss": 0.4757, "step": 13476 }, { "epoch": 1.72, "grad_norm": 0.8868476722643397, "learning_rate": 5.15816909777892e-07, "loss": 0.5238, "step": 13477 }, { "epoch": 1.72, "grad_norm": 0.6027123674190451, "learning_rate": 5.153606707475928e-07, "loss": 0.4633, "step": 13478 }, { "epoch": 1.72, "grad_norm": 0.6275408433129489, "learning_rate": 5.149046226140597e-07, "loss": 0.4254, "step": 13479 }, { "epoch": 1.72, "grad_norm": 0.579274325214195, "learning_rate": 5.144487653967034e-07, "loss": 0.4188, "step": 13480 }, { "epoch": 1.72, "grad_norm": 0.7334438457264898, "learning_rate": 5.139930991149305e-07, "loss": 0.509, "step": 13481 }, { "epoch": 1.72, "grad_norm": 0.7454055048413789, "learning_rate": 5.135376237881351e-07, "loss": 0.4868, "step": 13482 }, { "epoch": 1.72, "grad_norm": 0.6331981607311464, "learning_rate": 5.130823394357076e-07, "loss": 0.426, "step": 13483 }, { "epoch": 1.72, "grad_norm": 0.7226006228562415, "learning_rate": 5.126272460770259e-07, "loss": 0.4879, "step": 13484 }, { "epoch": 1.72, "grad_norm": 0.62573136086021, "learning_rate": 5.121723437314624e-07, "loss": 0.4858, "step": 13485 }, { "epoch": 1.72, "grad_norm": 1.0045850742193194, "learning_rate": 5.117176324183815e-07, "loss": 0.5121, "step": 13486 }, { "epoch": 1.72, "grad_norm": 0.6638344789425039, "learning_rate": 5.112631121571388e-07, "loss": 0.4225, "step": 13487 }, { "epoch": 1.72, "grad_norm": 0.6082734494787309, "learning_rate": 5.108087829670811e-07, "loss": 0.434, "step": 13488 }, { "epoch": 1.72, "grad_norm": 0.6367235384582767, "learning_rate": 5.10354644867549e-07, "loss": 0.4535, "step": 13489 }, { "epoch": 1.72, "grad_norm": 0.6522595937119088, "learning_rate": 5.09900697877872e-07, "loss": 0.4869, "step": 13490 }, { "epoch": 1.72, "grad_norm": 0.7877913073177728, "learning_rate": 5.094469420173747e-07, "loss": 0.5572, "step": 13491 }, { "epoch": 1.72, "grad_norm": 0.7264053590693375, "learning_rate": 5.089933773053701e-07, "loss": 0.4906, "step": 13492 }, { "epoch": 1.72, "grad_norm": 0.6496694524268284, "learning_rate": 5.085400037611677e-07, "loss": 0.4857, "step": 13493 }, { "epoch": 1.72, "grad_norm": 0.8183997118751484, "learning_rate": 5.080868214040635e-07, "loss": 0.5826, "step": 13494 }, { "epoch": 1.72, "grad_norm": 0.7857623147630399, "learning_rate": 5.076338302533501e-07, "loss": 0.516, "step": 13495 }, { "epoch": 1.72, "grad_norm": 0.6294633985731893, "learning_rate": 5.071810303283082e-07, "loss": 0.4374, "step": 13496 }, { "epoch": 1.72, "grad_norm": 0.653255956901337, "learning_rate": 5.067284216482138e-07, "loss": 0.5019, "step": 13497 }, { "epoch": 1.72, "grad_norm": 0.6859327814773434, "learning_rate": 5.0627600423233e-07, "loss": 0.4589, "step": 13498 }, { "epoch": 1.72, "grad_norm": 0.5763504152493674, "learning_rate": 5.058237780999187e-07, "loss": 0.3936, "step": 13499 }, { "epoch": 1.72, "grad_norm": 0.5695098473497525, "learning_rate": 5.053717432702271e-07, "loss": 0.4191, "step": 13500 }, { "epoch": 1.72, "grad_norm": 1.4055190549042542, "learning_rate": 5.049198997624982e-07, "loss": 0.4572, "step": 13501 }, { "epoch": 1.72, "grad_norm": 0.7630383485124494, "learning_rate": 5.044682475959645e-07, "loss": 0.4507, "step": 13502 }, { "epoch": 1.72, "grad_norm": 0.5686152642053554, "learning_rate": 5.040167867898522e-07, "loss": 0.421, "step": 13503 }, { "epoch": 1.72, "grad_norm": 0.712684044349212, "learning_rate": 5.035655173633774e-07, "loss": 0.4771, "step": 13504 }, { "epoch": 1.72, "grad_norm": 0.6816172683455951, "learning_rate": 5.031144393357512e-07, "loss": 0.4485, "step": 13505 }, { "epoch": 1.72, "grad_norm": 0.7496102324386802, "learning_rate": 5.026635527261719e-07, "loss": 0.4733, "step": 13506 }, { "epoch": 1.72, "grad_norm": 0.6357732452616848, "learning_rate": 5.022128575538354e-07, "loss": 0.47, "step": 13507 }, { "epoch": 1.72, "grad_norm": 0.6000843601349439, "learning_rate": 5.017623538379235e-07, "loss": 0.4183, "step": 13508 }, { "epoch": 1.72, "grad_norm": 0.6616614915556595, "learning_rate": 5.013120415976147e-07, "loss": 0.4565, "step": 13509 }, { "epoch": 1.72, "grad_norm": 0.7459842592788108, "learning_rate": 5.008619208520743e-07, "loss": 0.4986, "step": 13510 }, { "epoch": 1.72, "grad_norm": 0.9278804279838205, "learning_rate": 5.00411991620467e-07, "loss": 0.508, "step": 13511 }, { "epoch": 1.72, "grad_norm": 0.8221094224462332, "learning_rate": 4.999622539219417e-07, "loss": 0.5237, "step": 13512 }, { "epoch": 1.72, "grad_norm": 0.7273220734822718, "learning_rate": 4.995127077756439e-07, "loss": 0.4693, "step": 13513 }, { "epoch": 1.72, "grad_norm": 0.6772972411520407, "learning_rate": 4.990633532007078e-07, "loss": 0.44, "step": 13514 }, { "epoch": 1.72, "grad_norm": 0.6541062643443416, "learning_rate": 4.986141902162628e-07, "loss": 0.5324, "step": 13515 }, { "epoch": 1.72, "grad_norm": 0.7053463019454407, "learning_rate": 4.981652188414265e-07, "loss": 0.5148, "step": 13516 }, { "epoch": 1.72, "grad_norm": 0.5955620477931916, "learning_rate": 4.977164390953121e-07, "loss": 0.4892, "step": 13517 }, { "epoch": 1.72, "grad_norm": 0.7382496021454763, "learning_rate": 4.972678509970203e-07, "loss": 0.505, "step": 13518 }, { "epoch": 1.72, "grad_norm": 0.6784560396331419, "learning_rate": 4.968194545656485e-07, "loss": 0.5053, "step": 13519 }, { "epoch": 1.72, "grad_norm": 0.7766822457570377, "learning_rate": 4.963712498202817e-07, "loss": 0.4648, "step": 13520 }, { "epoch": 1.72, "grad_norm": 0.6516028871054869, "learning_rate": 4.959232367799999e-07, "loss": 0.5185, "step": 13521 }, { "epoch": 1.72, "grad_norm": 0.7089130839844244, "learning_rate": 4.954754154638719e-07, "loss": 0.5004, "step": 13522 }, { "epoch": 1.72, "grad_norm": 0.8122221524791091, "learning_rate": 4.950277858909614e-07, "loss": 0.5048, "step": 13523 }, { "epoch": 1.72, "grad_norm": 0.6097800677025303, "learning_rate": 4.945803480803229e-07, "loss": 0.4899, "step": 13524 }, { "epoch": 1.72, "grad_norm": 0.5677240840312868, "learning_rate": 4.94133102051001e-07, "loss": 0.4456, "step": 13525 }, { "epoch": 1.72, "grad_norm": 0.7117314217370676, "learning_rate": 4.936860478220351e-07, "loss": 0.475, "step": 13526 }, { "epoch": 1.72, "grad_norm": 0.7596848138537763, "learning_rate": 4.93239185412453e-07, "loss": 0.4915, "step": 13527 }, { "epoch": 1.72, "grad_norm": 0.5918251609758632, "learning_rate": 4.927925148412782e-07, "loss": 0.4784, "step": 13528 }, { "epoch": 1.72, "grad_norm": 0.7191352130736824, "learning_rate": 4.923460361275224e-07, "loss": 0.5083, "step": 13529 }, { "epoch": 1.72, "grad_norm": 0.6775535207770718, "learning_rate": 4.918997492901917e-07, "loss": 0.4763, "step": 13530 }, { "epoch": 1.72, "grad_norm": 1.0275526216187507, "learning_rate": 4.914536543482823e-07, "loss": 0.5076, "step": 13531 }, { "epoch": 1.72, "grad_norm": 0.7589829586955565, "learning_rate": 4.910077513207845e-07, "loss": 0.5334, "step": 13532 }, { "epoch": 1.72, "grad_norm": 0.7363545842338399, "learning_rate": 4.905620402266775e-07, "loss": 0.5198, "step": 13533 }, { "epoch": 1.72, "grad_norm": 0.771284617343047, "learning_rate": 4.90116521084934e-07, "loss": 0.5261, "step": 13534 }, { "epoch": 1.72, "grad_norm": 0.8323167088875298, "learning_rate": 4.896711939145188e-07, "loss": 0.6018, "step": 13535 }, { "epoch": 1.72, "grad_norm": 0.6935884826232217, "learning_rate": 4.892260587343889e-07, "loss": 0.5004, "step": 13536 }, { "epoch": 1.72, "grad_norm": 0.6159611784103374, "learning_rate": 4.887811155634897e-07, "loss": 0.4058, "step": 13537 }, { "epoch": 1.72, "grad_norm": 0.753861121494902, "learning_rate": 4.883363644207645e-07, "loss": 0.4452, "step": 13538 }, { "epoch": 1.72, "grad_norm": 0.6338400041600465, "learning_rate": 4.878918053251414e-07, "loss": 0.45, "step": 13539 }, { "epoch": 1.72, "grad_norm": 0.8067653171597988, "learning_rate": 4.874474382955469e-07, "loss": 0.55, "step": 13540 }, { "epoch": 1.73, "grad_norm": 0.6001878534015269, "learning_rate": 4.870032633508936e-07, "loss": 0.454, "step": 13541 }, { "epoch": 1.73, "grad_norm": 0.597975842562331, "learning_rate": 4.865592805100911e-07, "loss": 0.473, "step": 13542 }, { "epoch": 1.73, "grad_norm": 0.7239541796475676, "learning_rate": 4.861154897920361e-07, "loss": 0.5008, "step": 13543 }, { "epoch": 1.73, "grad_norm": 0.7159151955613364, "learning_rate": 4.856718912156216e-07, "loss": 0.5214, "step": 13544 }, { "epoch": 1.73, "grad_norm": 0.7927731341169386, "learning_rate": 4.85228484799728e-07, "loss": 0.5502, "step": 13545 }, { "epoch": 1.73, "grad_norm": 0.6654744144387269, "learning_rate": 4.847852705632311e-07, "loss": 0.4802, "step": 13546 }, { "epoch": 1.73, "grad_norm": 0.7219506219806303, "learning_rate": 4.843422485249955e-07, "loss": 0.5198, "step": 13547 }, { "epoch": 1.73, "grad_norm": 0.8044924443974449, "learning_rate": 4.838994187038815e-07, "loss": 0.5706, "step": 13548 }, { "epoch": 1.73, "grad_norm": 0.6669489596975234, "learning_rate": 4.834567811187374e-07, "loss": 0.4919, "step": 13549 }, { "epoch": 1.73, "grad_norm": 0.7365647121709059, "learning_rate": 4.83014335788406e-07, "loss": 0.4794, "step": 13550 }, { "epoch": 1.73, "grad_norm": 0.6393036687338078, "learning_rate": 4.825720827317193e-07, "loss": 0.4468, "step": 13551 }, { "epoch": 1.73, "grad_norm": 0.6792548326390625, "learning_rate": 4.821300219675046e-07, "loss": 0.4441, "step": 13552 }, { "epoch": 1.73, "grad_norm": 0.6067360957198411, "learning_rate": 4.816881535145767e-07, "loss": 0.4178, "step": 13553 }, { "epoch": 1.73, "grad_norm": 0.5782906973373195, "learning_rate": 4.812464773917464e-07, "loss": 0.4307, "step": 13554 }, { "epoch": 1.73, "grad_norm": 0.6042960632234778, "learning_rate": 4.808049936178127e-07, "loss": 0.4271, "step": 13555 }, { "epoch": 1.73, "grad_norm": 0.5991060298994619, "learning_rate": 4.803637022115703e-07, "loss": 0.5052, "step": 13556 }, { "epoch": 1.73, "grad_norm": 0.6195954718110437, "learning_rate": 4.799226031918014e-07, "loss": 0.4403, "step": 13557 }, { "epoch": 1.73, "grad_norm": 0.6935639288489386, "learning_rate": 4.794816965772842e-07, "loss": 0.514, "step": 13558 }, { "epoch": 1.73, "grad_norm": 0.6621847486999831, "learning_rate": 4.790409823867848e-07, "loss": 0.4816, "step": 13559 }, { "epoch": 1.73, "grad_norm": 0.6914010151862571, "learning_rate": 4.786004606390637e-07, "loss": 0.4728, "step": 13560 }, { "epoch": 1.73, "grad_norm": 0.6674388660113114, "learning_rate": 4.781601313528728e-07, "loss": 0.4438, "step": 13561 }, { "epoch": 1.73, "grad_norm": 0.6517763825662002, "learning_rate": 4.777199945469557e-07, "loss": 0.4506, "step": 13562 }, { "epoch": 1.73, "grad_norm": 0.6677684294459226, "learning_rate": 4.77280050240047e-07, "loss": 0.5224, "step": 13563 }, { "epoch": 1.73, "grad_norm": 0.8198693242720424, "learning_rate": 4.768402984508747e-07, "loss": 0.5089, "step": 13564 }, { "epoch": 1.73, "grad_norm": 0.6576908312494795, "learning_rate": 4.764007391981562e-07, "loss": 0.4799, "step": 13565 }, { "epoch": 1.73, "grad_norm": 0.6462429513844138, "learning_rate": 4.75961372500604e-07, "loss": 0.4252, "step": 13566 }, { "epoch": 1.73, "grad_norm": 0.6562531283304014, "learning_rate": 4.7552219837691817e-07, "loss": 0.4395, "step": 13567 }, { "epoch": 1.73, "grad_norm": 0.6730670611525732, "learning_rate": 4.750832168457953e-07, "loss": 0.5256, "step": 13568 }, { "epoch": 1.73, "grad_norm": 0.7268859756195081, "learning_rate": 4.746444279259188e-07, "loss": 0.5074, "step": 13569 }, { "epoch": 1.73, "grad_norm": 0.7001310500078968, "learning_rate": 4.742058316359699e-07, "loss": 0.5334, "step": 13570 }, { "epoch": 1.73, "grad_norm": 0.7354907873138025, "learning_rate": 4.7376742799461475e-07, "loss": 0.5058, "step": 13571 }, { "epoch": 1.73, "grad_norm": 0.6242665549811764, "learning_rate": 4.733292170205167e-07, "loss": 0.5066, "step": 13572 }, { "epoch": 1.73, "grad_norm": 0.7176781166711034, "learning_rate": 4.728911987323287e-07, "loss": 0.4649, "step": 13573 }, { "epoch": 1.73, "grad_norm": 0.7735815881153588, "learning_rate": 4.7245337314869635e-07, "loss": 0.4929, "step": 13574 }, { "epoch": 1.73, "grad_norm": 0.6185567361752308, "learning_rate": 4.7201574028825523e-07, "loss": 0.4459, "step": 13575 }, { "epoch": 1.73, "grad_norm": 0.6325301098605871, "learning_rate": 4.715783001696361e-07, "loss": 0.523, "step": 13576 }, { "epoch": 1.73, "grad_norm": 0.7156112527367392, "learning_rate": 4.7114105281145627e-07, "loss": 0.5099, "step": 13577 }, { "epoch": 1.73, "grad_norm": 0.6096909406249281, "learning_rate": 4.707039982323314e-07, "loss": 0.434, "step": 13578 }, { "epoch": 1.73, "grad_norm": 0.7971229333493872, "learning_rate": 4.7026713645086206e-07, "loss": 0.5119, "step": 13579 }, { "epoch": 1.73, "grad_norm": 0.5858950152961089, "learning_rate": 4.698304674856474e-07, "loss": 0.4681, "step": 13580 }, { "epoch": 1.73, "grad_norm": 0.639063402212429, "learning_rate": 4.6939399135527195e-07, "loss": 0.4754, "step": 13581 }, { "epoch": 1.73, "grad_norm": 0.6775712447483272, "learning_rate": 4.6895770807831755e-07, "loss": 0.4866, "step": 13582 }, { "epoch": 1.73, "grad_norm": 0.7334146926616699, "learning_rate": 4.6852161767335366e-07, "loss": 0.5794, "step": 13583 }, { "epoch": 1.73, "grad_norm": 0.7860587603644835, "learning_rate": 4.680857201589439e-07, "loss": 0.4878, "step": 13584 }, { "epoch": 1.73, "grad_norm": 0.6723821141180317, "learning_rate": 4.6765001555364396e-07, "loss": 0.4754, "step": 13585 }, { "epoch": 1.73, "grad_norm": 0.6758163906241748, "learning_rate": 4.6721450387599845e-07, "loss": 0.4668, "step": 13586 }, { "epoch": 1.73, "grad_norm": 0.8099471534284375, "learning_rate": 4.66779185144548e-07, "loss": 0.4691, "step": 13587 }, { "epoch": 1.73, "grad_norm": 0.6913333396137514, "learning_rate": 4.6634405937782014e-07, "loss": 0.5049, "step": 13588 }, { "epoch": 1.73, "grad_norm": 0.6550503497771092, "learning_rate": 4.6590912659433996e-07, "loss": 0.4891, "step": 13589 }, { "epoch": 1.73, "grad_norm": 0.6549204010572162, "learning_rate": 4.654743868126177e-07, "loss": 0.4519, "step": 13590 }, { "epoch": 1.73, "grad_norm": 0.5455458549016325, "learning_rate": 4.650398400511619e-07, "loss": 0.3784, "step": 13591 }, { "epoch": 1.73, "grad_norm": 0.5481318263913486, "learning_rate": 4.6460548632846716e-07, "loss": 0.3925, "step": 13592 }, { "epoch": 1.73, "grad_norm": 0.7071082099945568, "learning_rate": 4.6417132566302483e-07, "loss": 0.4863, "step": 13593 }, { "epoch": 1.73, "grad_norm": 0.769192734193263, "learning_rate": 4.63737358073314e-07, "loss": 0.523, "step": 13594 }, { "epoch": 1.73, "grad_norm": 0.8005694182314421, "learning_rate": 4.633035835778077e-07, "loss": 0.5482, "step": 13595 }, { "epoch": 1.73, "grad_norm": 0.7635386247055601, "learning_rate": 4.6287000219497224e-07, "loss": 0.5088, "step": 13596 }, { "epoch": 1.73, "grad_norm": 0.6720773434613845, "learning_rate": 4.6243661394326065e-07, "loss": 0.4836, "step": 13597 }, { "epoch": 1.73, "grad_norm": 0.907262325156144, "learning_rate": 4.6200341884112266e-07, "loss": 0.5027, "step": 13598 }, { "epoch": 1.73, "grad_norm": 0.7544457101168585, "learning_rate": 4.6157041690699843e-07, "loss": 0.5144, "step": 13599 }, { "epoch": 1.73, "grad_norm": 0.6754431194378012, "learning_rate": 4.611376081593183e-07, "loss": 0.4807, "step": 13600 }, { "epoch": 1.73, "grad_norm": 0.8511346670920159, "learning_rate": 4.6070499261650636e-07, "loss": 0.4982, "step": 13601 }, { "epoch": 1.73, "grad_norm": 0.6854371207577695, "learning_rate": 4.6027257029697683e-07, "loss": 0.4673, "step": 13602 }, { "epoch": 1.73, "grad_norm": 1.2245251641397126, "learning_rate": 4.598403412191382e-07, "loss": 0.5463, "step": 13603 }, { "epoch": 1.73, "grad_norm": 0.7728900346365459, "learning_rate": 4.5940830540138705e-07, "loss": 0.587, "step": 13604 }, { "epoch": 1.73, "grad_norm": 0.6295283021672786, "learning_rate": 4.589764628621152e-07, "loss": 0.4665, "step": 13605 }, { "epoch": 1.73, "grad_norm": 0.5825543419430843, "learning_rate": 4.585448136197035e-07, "loss": 0.4856, "step": 13606 }, { "epoch": 1.73, "grad_norm": 0.9423355624640816, "learning_rate": 4.5811335769252786e-07, "loss": 0.5555, "step": 13607 }, { "epoch": 1.73, "grad_norm": 0.6620348647994306, "learning_rate": 4.576820950989519e-07, "loss": 0.5258, "step": 13608 }, { "epoch": 1.73, "grad_norm": 0.9210313481731522, "learning_rate": 4.572510258573337e-07, "loss": 0.5013, "step": 13609 }, { "epoch": 1.73, "grad_norm": 0.5931712673968993, "learning_rate": 4.568201499860225e-07, "loss": 0.4736, "step": 13610 }, { "epoch": 1.73, "grad_norm": 0.7133475337454703, "learning_rate": 4.5638946750336087e-07, "loss": 0.5192, "step": 13611 }, { "epoch": 1.73, "grad_norm": 0.6005531807726122, "learning_rate": 4.5595897842767965e-07, "loss": 0.4496, "step": 13612 }, { "epoch": 1.73, "grad_norm": 0.7019352748328253, "learning_rate": 4.5552868277730476e-07, "loss": 0.4397, "step": 13613 }, { "epoch": 1.73, "grad_norm": 0.6248675031177257, "learning_rate": 4.5509858057055046e-07, "loss": 0.4761, "step": 13614 }, { "epoch": 1.73, "grad_norm": 0.7819824662023359, "learning_rate": 4.546686718257276e-07, "loss": 0.5218, "step": 13615 }, { "epoch": 1.73, "grad_norm": 0.7224126723161698, "learning_rate": 4.542389565611333e-07, "loss": 0.5272, "step": 13616 }, { "epoch": 1.73, "grad_norm": 0.8725884433584695, "learning_rate": 4.538094347950617e-07, "loss": 0.4815, "step": 13617 }, { "epoch": 1.73, "grad_norm": 0.6070487674554493, "learning_rate": 4.533801065457938e-07, "loss": 0.4616, "step": 13618 }, { "epoch": 1.74, "grad_norm": 0.6653619506450418, "learning_rate": 4.5295097183160664e-07, "loss": 0.4766, "step": 13619 }, { "epoch": 1.74, "grad_norm": 0.6603035315559673, "learning_rate": 4.525220306707656e-07, "loss": 0.517, "step": 13620 }, { "epoch": 1.74, "grad_norm": 0.647879453300088, "learning_rate": 4.520932830815311e-07, "loss": 0.5028, "step": 13621 }, { "epoch": 1.74, "grad_norm": 0.9087748814619964, "learning_rate": 4.516647290821502e-07, "loss": 0.5585, "step": 13622 }, { "epoch": 1.74, "grad_norm": 0.7305831251467169, "learning_rate": 4.5123636869086995e-07, "loss": 0.515, "step": 13623 }, { "epoch": 1.74, "grad_norm": 0.6955774296570529, "learning_rate": 4.5080820192592023e-07, "loss": 0.4654, "step": 13624 }, { "epoch": 1.74, "grad_norm": 0.684087760694511, "learning_rate": 4.5038022880552925e-07, "loss": 0.4888, "step": 13625 }, { "epoch": 1.74, "grad_norm": 0.6934702598117888, "learning_rate": 4.499524493479124e-07, "loss": 0.5112, "step": 13626 }, { "epoch": 1.74, "grad_norm": 0.8485619406309639, "learning_rate": 4.4952486357128124e-07, "loss": 0.5044, "step": 13627 }, { "epoch": 1.74, "grad_norm": 0.6589054935173704, "learning_rate": 4.49097471493834e-07, "loss": 0.4721, "step": 13628 }, { "epoch": 1.74, "grad_norm": 0.7036526025349592, "learning_rate": 4.486702731337661e-07, "loss": 0.4872, "step": 13629 }, { "epoch": 1.74, "grad_norm": 0.6770819160163123, "learning_rate": 4.482432685092597e-07, "loss": 0.463, "step": 13630 }, { "epoch": 1.74, "grad_norm": 0.6604424563571408, "learning_rate": 4.4781645763849244e-07, "loss": 0.4673, "step": 13631 }, { "epoch": 1.74, "grad_norm": 0.7590386111059159, "learning_rate": 4.4738984053963155e-07, "loss": 0.5426, "step": 13632 }, { "epoch": 1.74, "grad_norm": 0.8884841491381141, "learning_rate": 4.469634172308379e-07, "loss": 0.5215, "step": 13633 }, { "epoch": 1.74, "grad_norm": 0.75292188040192, "learning_rate": 4.4653718773026044e-07, "loss": 0.5274, "step": 13634 }, { "epoch": 1.74, "grad_norm": 0.7955425104998192, "learning_rate": 4.461111520560457e-07, "loss": 0.523, "step": 13635 }, { "epoch": 1.74, "grad_norm": 0.606892175257275, "learning_rate": 4.456853102263259e-07, "loss": 0.4301, "step": 13636 }, { "epoch": 1.74, "grad_norm": 0.7444793890261514, "learning_rate": 4.4525966225923035e-07, "loss": 0.5213, "step": 13637 }, { "epoch": 1.74, "grad_norm": 0.7884549510097256, "learning_rate": 4.4483420817287515e-07, "loss": 0.5114, "step": 13638 }, { "epoch": 1.74, "grad_norm": 0.633017352352882, "learning_rate": 4.4440894798537194e-07, "loss": 0.4461, "step": 13639 }, { "epoch": 1.74, "grad_norm": 0.7876208604858294, "learning_rate": 4.439838817148218e-07, "loss": 0.515, "step": 13640 }, { "epoch": 1.74, "grad_norm": 0.7343144756737766, "learning_rate": 4.435590093793196e-07, "loss": 0.5118, "step": 13641 }, { "epoch": 1.74, "grad_norm": 0.8422219860572921, "learning_rate": 4.431343309969488e-07, "loss": 0.5213, "step": 13642 }, { "epoch": 1.74, "grad_norm": 0.8218834595043661, "learning_rate": 4.427098465857893e-07, "loss": 0.5333, "step": 13643 }, { "epoch": 1.74, "grad_norm": 0.7872571675722545, "learning_rate": 4.4228555616390776e-07, "loss": 0.4515, "step": 13644 }, { "epoch": 1.74, "grad_norm": 0.6231678005200024, "learning_rate": 4.4186145974936646e-07, "loss": 0.501, "step": 13645 }, { "epoch": 1.74, "grad_norm": 0.7750746986514648, "learning_rate": 4.414375573602159e-07, "loss": 0.5552, "step": 13646 }, { "epoch": 1.74, "grad_norm": 0.8979860320457753, "learning_rate": 4.410138490145016e-07, "loss": 0.4781, "step": 13647 }, { "epoch": 1.74, "grad_norm": 0.6839099199031857, "learning_rate": 4.4059033473026035e-07, "loss": 0.4431, "step": 13648 }, { "epoch": 1.74, "grad_norm": 0.7067937114450181, "learning_rate": 4.401670145255177e-07, "loss": 0.5398, "step": 13649 }, { "epoch": 1.74, "grad_norm": 0.723041758838463, "learning_rate": 4.3974388841829527e-07, "loss": 0.4665, "step": 13650 }, { "epoch": 1.74, "grad_norm": 0.6398079613170673, "learning_rate": 4.3932095642660213e-07, "loss": 0.4731, "step": 13651 }, { "epoch": 1.74, "grad_norm": 2.793456418447509, "learning_rate": 4.3889821856844263e-07, "loss": 0.5175, "step": 13652 }, { "epoch": 1.74, "grad_norm": 0.5773375416035301, "learning_rate": 4.384756748618102e-07, "loss": 0.4409, "step": 13653 }, { "epoch": 1.74, "grad_norm": 0.5901188096602895, "learning_rate": 4.3805332532469216e-07, "loss": 0.4669, "step": 13654 }, { "epoch": 1.74, "grad_norm": 0.5898743966453424, "learning_rate": 4.376311699750657e-07, "loss": 0.485, "step": 13655 }, { "epoch": 1.74, "grad_norm": 0.6686818133344211, "learning_rate": 4.3720920883090154e-07, "loss": 0.5457, "step": 13656 }, { "epoch": 1.74, "grad_norm": 0.6971127388813682, "learning_rate": 4.367874419101603e-07, "loss": 0.4813, "step": 13657 }, { "epoch": 1.74, "grad_norm": 0.6528132602450198, "learning_rate": 4.363658692307954e-07, "loss": 0.4737, "step": 13658 }, { "epoch": 1.74, "grad_norm": 0.7259540859912853, "learning_rate": 4.359444908107524e-07, "loss": 0.5226, "step": 13659 }, { "epoch": 1.74, "grad_norm": 0.6275523484444302, "learning_rate": 4.355233066679687e-07, "loss": 0.4296, "step": 13660 }, { "epoch": 1.74, "grad_norm": 0.5972113420592744, "learning_rate": 4.3510231682037106e-07, "loss": 0.3939, "step": 13661 }, { "epoch": 1.74, "grad_norm": 0.6165715232087895, "learning_rate": 4.346815212858813e-07, "loss": 0.5348, "step": 13662 }, { "epoch": 1.74, "grad_norm": 0.7363532202243693, "learning_rate": 4.342609200824094e-07, "loss": 0.5432, "step": 13663 }, { "epoch": 1.74, "grad_norm": 0.7430287274215338, "learning_rate": 4.3384051322786116e-07, "loss": 0.5144, "step": 13664 }, { "epoch": 1.74, "grad_norm": 0.6130310463631212, "learning_rate": 4.3342030074013e-07, "loss": 0.4774, "step": 13665 }, { "epoch": 1.74, "grad_norm": 0.6394758565750073, "learning_rate": 4.3300028263710434e-07, "loss": 0.4469, "step": 13666 }, { "epoch": 1.74, "grad_norm": 0.6900451677181542, "learning_rate": 4.325804589366622e-07, "loss": 0.4599, "step": 13667 }, { "epoch": 1.74, "grad_norm": 0.607977632757067, "learning_rate": 4.321608296566754e-07, "loss": 0.4328, "step": 13668 }, { "epoch": 1.74, "grad_norm": 0.6017683240864804, "learning_rate": 4.3174139481500457e-07, "loss": 0.4796, "step": 13669 }, { "epoch": 1.74, "grad_norm": 0.6221075031004839, "learning_rate": 4.313221544295054e-07, "loss": 0.4837, "step": 13670 }, { "epoch": 1.74, "grad_norm": 0.6882749835797493, "learning_rate": 4.3090310851802097e-07, "loss": 0.5064, "step": 13671 }, { "epoch": 1.74, "grad_norm": 0.6958970308556817, "learning_rate": 4.3048425709839194e-07, "loss": 0.4905, "step": 13672 }, { "epoch": 1.74, "grad_norm": 0.620610737644033, "learning_rate": 4.300656001884451e-07, "loss": 0.4537, "step": 13673 }, { "epoch": 1.74, "grad_norm": 0.6388890546587489, "learning_rate": 4.2964713780600355e-07, "loss": 0.4602, "step": 13674 }, { "epoch": 1.74, "grad_norm": 0.6500204957518717, "learning_rate": 4.292288699688774e-07, "loss": 0.4823, "step": 13675 }, { "epoch": 1.74, "grad_norm": 0.9346041359252649, "learning_rate": 4.288107966948729e-07, "loss": 0.4992, "step": 13676 }, { "epoch": 1.74, "grad_norm": 0.7245657535589, "learning_rate": 4.2839291800178484e-07, "loss": 0.5014, "step": 13677 }, { "epoch": 1.74, "grad_norm": 0.6161390363755728, "learning_rate": 4.2797523390740227e-07, "loss": 0.4836, "step": 13678 }, { "epoch": 1.74, "grad_norm": 0.6944588003800906, "learning_rate": 4.275577444295026e-07, "loss": 0.4965, "step": 13679 }, { "epoch": 1.74, "grad_norm": 0.6459201801821535, "learning_rate": 4.2714044958585943e-07, "loss": 0.4501, "step": 13680 }, { "epoch": 1.74, "grad_norm": 0.6323875502268382, "learning_rate": 4.267233493942341e-07, "loss": 0.4404, "step": 13681 }, { "epoch": 1.74, "grad_norm": 0.7229272929413048, "learning_rate": 4.263064438723824e-07, "loss": 0.4848, "step": 13682 }, { "epoch": 1.74, "grad_norm": 0.7233122432409727, "learning_rate": 4.258897330380485e-07, "loss": 0.5184, "step": 13683 }, { "epoch": 1.74, "grad_norm": 0.7704476090269072, "learning_rate": 4.254732169089726e-07, "loss": 0.5431, "step": 13684 }, { "epoch": 1.74, "grad_norm": 0.5844436938787214, "learning_rate": 4.2505689550288334e-07, "loss": 0.4574, "step": 13685 }, { "epoch": 1.74, "grad_norm": 0.6036896196576108, "learning_rate": 4.246407688375037e-07, "loss": 0.463, "step": 13686 }, { "epoch": 1.74, "grad_norm": 0.8239299290238078, "learning_rate": 4.242248369305446e-07, "loss": 0.4789, "step": 13687 }, { "epoch": 1.74, "grad_norm": 0.6411890599637349, "learning_rate": 4.238090997997135e-07, "loss": 0.5154, "step": 13688 }, { "epoch": 1.74, "grad_norm": 0.7600743982771705, "learning_rate": 4.2339355746270405e-07, "loss": 0.4997, "step": 13689 }, { "epoch": 1.74, "grad_norm": 0.6155163982047241, "learning_rate": 4.229782099372076e-07, "loss": 0.4541, "step": 13690 }, { "epoch": 1.74, "grad_norm": 0.8067969032953197, "learning_rate": 4.2256305724090116e-07, "loss": 0.4887, "step": 13691 }, { "epoch": 1.74, "grad_norm": 0.9053404096499114, "learning_rate": 4.221480993914595e-07, "loss": 0.4849, "step": 13692 }, { "epoch": 1.74, "grad_norm": 0.5963007834544024, "learning_rate": 4.2173333640654345e-07, "loss": 0.4345, "step": 13693 }, { "epoch": 1.74, "grad_norm": 0.7254760136086235, "learning_rate": 4.2131876830381e-07, "loss": 0.4525, "step": 13694 }, { "epoch": 1.74, "grad_norm": 0.5642448866663942, "learning_rate": 4.209043951009045e-07, "loss": 0.4297, "step": 13695 }, { "epoch": 1.74, "grad_norm": 0.6365882738171745, "learning_rate": 4.204902168154662e-07, "loss": 0.4469, "step": 13696 }, { "epoch": 1.74, "grad_norm": 0.5952408281324886, "learning_rate": 4.2007623346512536e-07, "loss": 0.3731, "step": 13697 }, { "epoch": 1.75, "grad_norm": 0.6454747602968248, "learning_rate": 4.196624450675046e-07, "loss": 0.4993, "step": 13698 }, { "epoch": 1.75, "grad_norm": 0.7339855605335405, "learning_rate": 4.19248851640216e-07, "loss": 0.4945, "step": 13699 }, { "epoch": 1.75, "grad_norm": 0.7963651255244028, "learning_rate": 4.188354532008665e-07, "loss": 0.5153, "step": 13700 }, { "epoch": 1.75, "grad_norm": 0.7567747382866997, "learning_rate": 4.1842224976705204e-07, "loss": 0.4739, "step": 13701 }, { "epoch": 1.75, "grad_norm": 0.7420194266112299, "learning_rate": 4.1800924135636245e-07, "loss": 0.4615, "step": 13702 }, { "epoch": 1.75, "grad_norm": 0.7546858369074817, "learning_rate": 4.175964279863765e-07, "loss": 0.5049, "step": 13703 }, { "epoch": 1.75, "grad_norm": 0.7709134575114354, "learning_rate": 4.1718380967466834e-07, "loss": 0.5016, "step": 13704 }, { "epoch": 1.75, "grad_norm": 0.6968054713717695, "learning_rate": 4.167713864388001e-07, "loss": 0.4813, "step": 13705 }, { "epoch": 1.75, "grad_norm": 0.724680961798452, "learning_rate": 4.163591582963289e-07, "loss": 0.4825, "step": 13706 }, { "epoch": 1.75, "grad_norm": 0.6453766748894985, "learning_rate": 4.159471252648001e-07, "loss": 0.4155, "step": 13707 }, { "epoch": 1.75, "grad_norm": 0.5993152183775042, "learning_rate": 4.155352873617535e-07, "loss": 0.4815, "step": 13708 }, { "epoch": 1.75, "grad_norm": 0.7447826723468072, "learning_rate": 4.1512364460472076e-07, "loss": 0.5019, "step": 13709 }, { "epoch": 1.75, "grad_norm": 0.6692301355999161, "learning_rate": 4.147121970112228e-07, "loss": 0.4369, "step": 13710 }, { "epoch": 1.75, "grad_norm": 0.6640618145264051, "learning_rate": 4.1430094459877504e-07, "loss": 0.5311, "step": 13711 }, { "epoch": 1.75, "grad_norm": 0.7965957111920747, "learning_rate": 4.138898873848807e-07, "loss": 0.4689, "step": 13712 }, { "epoch": 1.75, "grad_norm": 0.6548630641529637, "learning_rate": 4.134790253870402e-07, "loss": 0.4631, "step": 13713 }, { "epoch": 1.75, "grad_norm": 0.6624990469178805, "learning_rate": 4.130683586227402e-07, "loss": 0.4521, "step": 13714 }, { "epoch": 1.75, "grad_norm": 0.6822369077692493, "learning_rate": 4.126578871094633e-07, "loss": 0.5256, "step": 13715 }, { "epoch": 1.75, "grad_norm": 0.700754836501631, "learning_rate": 4.1224761086468e-07, "loss": 0.5025, "step": 13716 }, { "epoch": 1.75, "grad_norm": 0.6866079764154067, "learning_rate": 4.1183752990585636e-07, "loss": 0.4572, "step": 13717 }, { "epoch": 1.75, "grad_norm": 0.6561329929973297, "learning_rate": 4.114276442504472e-07, "loss": 0.4923, "step": 13718 }, { "epoch": 1.75, "grad_norm": 0.6836084417555767, "learning_rate": 4.110179539158998e-07, "loss": 0.5155, "step": 13719 }, { "epoch": 1.75, "grad_norm": 0.780814920251793, "learning_rate": 4.106084589196546e-07, "loss": 0.5255, "step": 13720 }, { "epoch": 1.75, "grad_norm": 0.8088555870418664, "learning_rate": 4.10199159279141e-07, "loss": 0.5002, "step": 13721 }, { "epoch": 1.75, "grad_norm": 0.5760947505143988, "learning_rate": 4.097900550117817e-07, "loss": 0.4378, "step": 13722 }, { "epoch": 1.75, "grad_norm": 0.7219083926785813, "learning_rate": 4.0938114613499334e-07, "loss": 0.4509, "step": 13723 }, { "epoch": 1.75, "grad_norm": 0.5515768989594894, "learning_rate": 4.089724326661787e-07, "loss": 0.4111, "step": 13724 }, { "epoch": 1.75, "grad_norm": 0.5998512533553908, "learning_rate": 4.085639146227377e-07, "loss": 0.4647, "step": 13725 }, { "epoch": 1.75, "grad_norm": 0.7836438241522958, "learning_rate": 4.0815559202205755e-07, "loss": 0.5301, "step": 13726 }, { "epoch": 1.75, "grad_norm": 1.2177995984825085, "learning_rate": 4.0774746488152163e-07, "loss": 0.5037, "step": 13727 }, { "epoch": 1.75, "grad_norm": 0.7829486995066145, "learning_rate": 4.0733953321850093e-07, "loss": 0.5269, "step": 13728 }, { "epoch": 1.75, "grad_norm": 0.7454986961864011, "learning_rate": 4.0693179705036057e-07, "loss": 0.5117, "step": 13729 }, { "epoch": 1.75, "grad_norm": 0.6247043639830051, "learning_rate": 4.0652425639445546e-07, "loss": 0.476, "step": 13730 }, { "epoch": 1.75, "grad_norm": 0.7117828884186429, "learning_rate": 4.061169112681351e-07, "loss": 0.5639, "step": 13731 }, { "epoch": 1.75, "grad_norm": 0.693040137858548, "learning_rate": 4.0570976168873676e-07, "loss": 0.5613, "step": 13732 }, { "epoch": 1.75, "grad_norm": 0.7740960637119998, "learning_rate": 4.0530280767359266e-07, "loss": 0.546, "step": 13733 }, { "epoch": 1.75, "grad_norm": 0.7030624450584297, "learning_rate": 4.0489604924002557e-07, "loss": 0.4696, "step": 13734 }, { "epoch": 1.75, "grad_norm": 0.6304245884084819, "learning_rate": 4.0448948640535114e-07, "loss": 0.4256, "step": 13735 }, { "epoch": 1.75, "grad_norm": 0.7906876920890266, "learning_rate": 4.040831191868727e-07, "loss": 0.5866, "step": 13736 }, { "epoch": 1.75, "grad_norm": 0.6867288654803573, "learning_rate": 4.0367694760189035e-07, "loss": 0.5005, "step": 13737 }, { "epoch": 1.75, "grad_norm": 0.687169783857831, "learning_rate": 4.032709716676919e-07, "loss": 0.427, "step": 13738 }, { "epoch": 1.75, "grad_norm": 0.9342386369303214, "learning_rate": 4.0286519140155967e-07, "loss": 0.4972, "step": 13739 }, { "epoch": 1.75, "grad_norm": 0.5578095674858486, "learning_rate": 4.0245960682076536e-07, "loss": 0.4513, "step": 13740 }, { "epoch": 1.75, "grad_norm": 0.5913051826881541, "learning_rate": 4.0205421794257517e-07, "loss": 0.4306, "step": 13741 }, { "epoch": 1.75, "grad_norm": 0.6942305598239782, "learning_rate": 4.016490247842425e-07, "loss": 0.4452, "step": 13742 }, { "epoch": 1.75, "grad_norm": 0.6064039562061521, "learning_rate": 4.0124402736301747e-07, "loss": 0.4618, "step": 13743 }, { "epoch": 1.75, "grad_norm": 0.6716124941368985, "learning_rate": 4.00839225696138e-07, "loss": 0.4356, "step": 13744 }, { "epoch": 1.75, "grad_norm": 0.579501076833223, "learning_rate": 4.0043461980083685e-07, "loss": 0.4383, "step": 13745 }, { "epoch": 1.75, "grad_norm": 0.7520692366726704, "learning_rate": 4.000302096943343e-07, "loss": 0.5426, "step": 13746 }, { "epoch": 1.75, "grad_norm": 0.6763153231718183, "learning_rate": 3.996259953938475e-07, "loss": 0.4557, "step": 13747 }, { "epoch": 1.75, "grad_norm": 0.607247675542398, "learning_rate": 3.992219769165806e-07, "loss": 0.4605, "step": 13748 }, { "epoch": 1.75, "grad_norm": 0.7780340874291284, "learning_rate": 3.988181542797337e-07, "loss": 0.5103, "step": 13749 }, { "epoch": 1.75, "grad_norm": 0.6235415222783003, "learning_rate": 3.98414527500493e-07, "loss": 0.451, "step": 13750 }, { "epoch": 1.75, "grad_norm": 0.7000781969917278, "learning_rate": 3.9801109659604264e-07, "loss": 0.4658, "step": 13751 }, { "epoch": 1.75, "grad_norm": 0.6426205247715331, "learning_rate": 3.9760786158355323e-07, "loss": 0.4628, "step": 13752 }, { "epoch": 1.75, "grad_norm": 0.7139740553630304, "learning_rate": 3.9720482248019055e-07, "loss": 0.4502, "step": 13753 }, { "epoch": 1.75, "grad_norm": 0.7534575013088043, "learning_rate": 3.9680197930310914e-07, "loss": 0.5059, "step": 13754 }, { "epoch": 1.75, "grad_norm": 0.7965615861797232, "learning_rate": 3.9639933206945923e-07, "loss": 0.5029, "step": 13755 }, { "epoch": 1.75, "grad_norm": 0.7363888294805679, "learning_rate": 3.9599688079637763e-07, "loss": 0.5154, "step": 13756 }, { "epoch": 1.75, "grad_norm": 0.8457494239258391, "learning_rate": 3.955946255009974e-07, "loss": 0.5633, "step": 13757 }, { "epoch": 1.75, "grad_norm": 0.7813100769878324, "learning_rate": 3.9519256620043857e-07, "loss": 0.5325, "step": 13758 }, { "epoch": 1.75, "grad_norm": 0.5908650592062479, "learning_rate": 3.9479070291181864e-07, "loss": 0.4321, "step": 13759 }, { "epoch": 1.75, "grad_norm": 0.6773314565871914, "learning_rate": 3.943890356522423e-07, "loss": 0.5048, "step": 13760 }, { "epoch": 1.75, "grad_norm": 0.7499849403199816, "learning_rate": 3.939875644388075e-07, "loss": 0.4586, "step": 13761 }, { "epoch": 1.75, "grad_norm": 0.7999678490467199, "learning_rate": 3.9358628928860276e-07, "loss": 0.5535, "step": 13762 }, { "epoch": 1.75, "grad_norm": 0.8094025783233719, "learning_rate": 3.931852102187106e-07, "loss": 0.5146, "step": 13763 }, { "epoch": 1.75, "grad_norm": 0.6543209975052142, "learning_rate": 3.9278432724620175e-07, "loss": 0.4801, "step": 13764 }, { "epoch": 1.75, "grad_norm": 0.7716557946278838, "learning_rate": 3.9238364038814257e-07, "loss": 0.4681, "step": 13765 }, { "epoch": 1.75, "grad_norm": 0.7818789634563763, "learning_rate": 3.9198314966158724e-07, "loss": 0.5047, "step": 13766 }, { "epoch": 1.75, "grad_norm": 0.7183877558036207, "learning_rate": 3.9158285508358484e-07, "loss": 0.5273, "step": 13767 }, { "epoch": 1.75, "grad_norm": 0.6821154475129704, "learning_rate": 3.911827566711729e-07, "loss": 0.4535, "step": 13768 }, { "epoch": 1.75, "grad_norm": 0.5958626902068337, "learning_rate": 3.907828544413844e-07, "loss": 0.4287, "step": 13769 }, { "epoch": 1.75, "grad_norm": 0.608773197017208, "learning_rate": 3.903831484112397e-07, "loss": 0.4585, "step": 13770 }, { "epoch": 1.75, "grad_norm": 0.6931973030221442, "learning_rate": 3.8998363859775456e-07, "loss": 0.4603, "step": 13771 }, { "epoch": 1.75, "grad_norm": 0.789078298627952, "learning_rate": 3.8958432501793485e-07, "loss": 0.4818, "step": 13772 }, { "epoch": 1.75, "grad_norm": 0.6272419965990593, "learning_rate": 3.89185207688777e-07, "loss": 0.4401, "step": 13773 }, { "epoch": 1.75, "grad_norm": 0.5958803268848424, "learning_rate": 3.887862866272718e-07, "loss": 0.4898, "step": 13774 }, { "epoch": 1.75, "grad_norm": 0.7814193535081847, "learning_rate": 3.8838756185039794e-07, "loss": 0.5203, "step": 13775 }, { "epoch": 1.76, "grad_norm": 0.8217318685769777, "learning_rate": 3.879890333751296e-07, "loss": 0.5629, "step": 13776 }, { "epoch": 1.76, "grad_norm": 0.6705661485189707, "learning_rate": 3.875907012184299e-07, "loss": 0.5213, "step": 13777 }, { "epoch": 1.76, "grad_norm": 0.6821513413209003, "learning_rate": 3.871925653972552e-07, "loss": 0.4693, "step": 13778 }, { "epoch": 1.76, "grad_norm": 0.6807070769460901, "learning_rate": 3.867946259285521e-07, "loss": 0.4853, "step": 13779 }, { "epoch": 1.76, "grad_norm": 0.7004562600544044, "learning_rate": 3.8639688282926133e-07, "loss": 0.5498, "step": 13780 }, { "epoch": 1.76, "grad_norm": 0.6332331345223675, "learning_rate": 3.8599933611631056e-07, "loss": 0.4973, "step": 13781 }, { "epoch": 1.76, "grad_norm": 0.7312128088391664, "learning_rate": 3.8560198580662456e-07, "loss": 0.5531, "step": 13782 }, { "epoch": 1.76, "grad_norm": 0.717187337017471, "learning_rate": 3.852048319171159e-07, "loss": 0.4397, "step": 13783 }, { "epoch": 1.76, "grad_norm": 1.0632855048355536, "learning_rate": 3.8480787446469215e-07, "loss": 0.4085, "step": 13784 }, { "epoch": 1.76, "grad_norm": 0.8336400301722282, "learning_rate": 3.844111134662482e-07, "loss": 0.5799, "step": 13785 }, { "epoch": 1.76, "grad_norm": 0.9325812997782521, "learning_rate": 3.840145489386743e-07, "loss": 0.536, "step": 13786 }, { "epoch": 1.76, "grad_norm": 0.675767267353411, "learning_rate": 3.836181808988498e-07, "loss": 0.5048, "step": 13787 }, { "epoch": 1.76, "grad_norm": 0.6443761369742436, "learning_rate": 3.832220093636485e-07, "loss": 0.476, "step": 13788 }, { "epoch": 1.76, "grad_norm": 0.6339980698189236, "learning_rate": 3.828260343499324e-07, "loss": 0.4547, "step": 13789 }, { "epoch": 1.76, "grad_norm": 0.6909201741886078, "learning_rate": 3.824302558745585e-07, "loss": 0.4375, "step": 13790 }, { "epoch": 1.76, "grad_norm": 0.7616900502690089, "learning_rate": 3.8203467395437177e-07, "loss": 0.4534, "step": 13791 }, { "epoch": 1.76, "grad_norm": 0.6429206518602049, "learning_rate": 3.8163928860621315e-07, "loss": 0.3982, "step": 13792 }, { "epoch": 1.76, "grad_norm": 0.7019539647552694, "learning_rate": 3.812440998469108e-07, "loss": 0.5338, "step": 13793 }, { "epoch": 1.76, "grad_norm": 0.7186593738283963, "learning_rate": 3.8084910769328854e-07, "loss": 0.5187, "step": 13794 }, { "epoch": 1.76, "grad_norm": 0.8424899953442363, "learning_rate": 3.8045431216215733e-07, "loss": 0.5036, "step": 13795 }, { "epoch": 1.76, "grad_norm": 0.5504283834995684, "learning_rate": 3.80059713270326e-07, "loss": 0.4337, "step": 13796 }, { "epoch": 1.76, "grad_norm": 0.7318098436809256, "learning_rate": 3.796653110345888e-07, "loss": 0.4868, "step": 13797 }, { "epoch": 1.76, "grad_norm": 0.7359701594684694, "learning_rate": 3.7927110547173517e-07, "loss": 0.5208, "step": 13798 }, { "epoch": 1.76, "grad_norm": 0.6939532056663928, "learning_rate": 3.788770965985444e-07, "loss": 0.5088, "step": 13799 }, { "epoch": 1.76, "grad_norm": 0.6953817167791094, "learning_rate": 3.7848328443178927e-07, "loss": 0.4793, "step": 13800 }, { "epoch": 1.76, "grad_norm": 0.6124442769625502, "learning_rate": 3.7808966898823184e-07, "loss": 0.4455, "step": 13801 }, { "epoch": 1.76, "grad_norm": 0.6445953245322291, "learning_rate": 3.776962502846282e-07, "loss": 0.483, "step": 13802 }, { "epoch": 1.76, "grad_norm": 0.6943237138565285, "learning_rate": 3.773030283377238e-07, "loss": 0.5746, "step": 13803 }, { "epoch": 1.76, "grad_norm": 0.7399994289777894, "learning_rate": 3.769100031642581e-07, "loss": 0.5114, "step": 13804 }, { "epoch": 1.76, "grad_norm": 0.7171811472714346, "learning_rate": 3.7651717478095983e-07, "loss": 0.5124, "step": 13805 }, { "epoch": 1.76, "grad_norm": 0.6872308732726289, "learning_rate": 3.761245432045513e-07, "loss": 0.4232, "step": 13806 }, { "epoch": 1.76, "grad_norm": 0.6290784762151004, "learning_rate": 3.7573210845174455e-07, "loss": 0.4782, "step": 13807 }, { "epoch": 1.76, "grad_norm": 0.6773105966936759, "learning_rate": 3.7533987053924524e-07, "loss": 0.4852, "step": 13808 }, { "epoch": 1.76, "grad_norm": 0.5509217852059749, "learning_rate": 3.7494782948374885e-07, "loss": 0.4344, "step": 13809 }, { "epoch": 1.76, "grad_norm": 0.6860480399657908, "learning_rate": 3.7455598530194483e-07, "loss": 0.4761, "step": 13810 }, { "epoch": 1.76, "grad_norm": 0.6429629706390407, "learning_rate": 3.741643380105109e-07, "loss": 0.4758, "step": 13811 }, { "epoch": 1.76, "grad_norm": 0.8489862776966325, "learning_rate": 3.7377288762611927e-07, "loss": 0.5642, "step": 13812 }, { "epoch": 1.76, "grad_norm": 0.6174925634396603, "learning_rate": 3.733816341654323e-07, "loss": 0.4591, "step": 13813 }, { "epoch": 1.76, "grad_norm": 0.8021769585351551, "learning_rate": 3.7299057764510536e-07, "loss": 0.548, "step": 13814 }, { "epoch": 1.76, "grad_norm": 0.5954272571058505, "learning_rate": 3.7259971808178307e-07, "loss": 0.479, "step": 13815 }, { "epoch": 1.76, "grad_norm": 0.5904385686337086, "learning_rate": 3.722090554921043e-07, "loss": 0.4187, "step": 13816 }, { "epoch": 1.76, "grad_norm": 0.7287402602994608, "learning_rate": 3.7181858989269683e-07, "loss": 0.4731, "step": 13817 }, { "epoch": 1.76, "grad_norm": 0.7510899701612301, "learning_rate": 3.714283213001835e-07, "loss": 0.536, "step": 13818 }, { "epoch": 1.76, "grad_norm": 0.684249562978372, "learning_rate": 3.7103824973117485e-07, "loss": 0.4833, "step": 13819 }, { "epoch": 1.76, "grad_norm": 0.759377988431207, "learning_rate": 3.7064837520227606e-07, "loss": 0.567, "step": 13820 }, { "epoch": 1.76, "grad_norm": 0.7734765880513665, "learning_rate": 3.7025869773008207e-07, "loss": 0.5392, "step": 13821 }, { "epoch": 1.76, "grad_norm": 0.7545584022070637, "learning_rate": 3.6986921733118187e-07, "loss": 0.5008, "step": 13822 }, { "epoch": 1.76, "grad_norm": 0.7812127641909019, "learning_rate": 3.6947993402215274e-07, "loss": 0.5058, "step": 13823 }, { "epoch": 1.76, "grad_norm": 0.7584456243061589, "learning_rate": 3.690908478195665e-07, "loss": 0.5721, "step": 13824 }, { "epoch": 1.76, "grad_norm": 0.8423722656539723, "learning_rate": 3.6870195873998373e-07, "loss": 0.4697, "step": 13825 }, { "epoch": 1.76, "grad_norm": 0.6243203196531476, "learning_rate": 3.683132667999595e-07, "loss": 0.4522, "step": 13826 }, { "epoch": 1.76, "grad_norm": 0.5624407286307632, "learning_rate": 3.679247720160384e-07, "loss": 0.4532, "step": 13827 }, { "epoch": 1.76, "grad_norm": 0.7025893414609898, "learning_rate": 3.675364744047588e-07, "loss": 0.4491, "step": 13828 }, { "epoch": 1.76, "grad_norm": 0.6401947583134544, "learning_rate": 3.6714837398264703e-07, "loss": 0.469, "step": 13829 }, { "epoch": 1.76, "grad_norm": 0.696789946526359, "learning_rate": 3.6676047076622536e-07, "loss": 0.518, "step": 13830 }, { "epoch": 1.76, "grad_norm": 0.5477554682539626, "learning_rate": 3.663727647720039e-07, "loss": 0.412, "step": 13831 }, { "epoch": 1.76, "grad_norm": 0.6393558057986582, "learning_rate": 3.659852560164867e-07, "loss": 0.5441, "step": 13832 }, { "epoch": 1.76, "grad_norm": 0.8024265131290113, "learning_rate": 3.6559794451617e-07, "loss": 0.5597, "step": 13833 }, { "epoch": 1.76, "grad_norm": 0.7512095085261866, "learning_rate": 3.6521083028753843e-07, "loss": 0.5648, "step": 13834 }, { "epoch": 1.76, "grad_norm": 0.8152117396168237, "learning_rate": 3.6482391334707213e-07, "loss": 0.4635, "step": 13835 }, { "epoch": 1.76, "grad_norm": 0.6500850286388544, "learning_rate": 3.644371937112395e-07, "loss": 0.4584, "step": 13836 }, { "epoch": 1.76, "grad_norm": 0.7319380199062522, "learning_rate": 3.640506713965025e-07, "loss": 0.5025, "step": 13837 }, { "epoch": 1.76, "grad_norm": 0.5995843906252302, "learning_rate": 3.636643464193135e-07, "loss": 0.4375, "step": 13838 }, { "epoch": 1.76, "grad_norm": 0.6582242880021271, "learning_rate": 3.632782187961187e-07, "loss": 0.4632, "step": 13839 }, { "epoch": 1.76, "grad_norm": 0.6533705350749462, "learning_rate": 3.6289228854335224e-07, "loss": 0.4576, "step": 13840 }, { "epoch": 1.76, "grad_norm": 0.6828699511637626, "learning_rate": 3.6250655567744375e-07, "loss": 0.4934, "step": 13841 }, { "epoch": 1.76, "grad_norm": 0.6373699365084816, "learning_rate": 3.621210202148112e-07, "loss": 0.4467, "step": 13842 }, { "epoch": 1.76, "grad_norm": 0.6021769768564017, "learning_rate": 3.617356821718665e-07, "loss": 0.4316, "step": 13843 }, { "epoch": 1.76, "grad_norm": 0.7219729213128098, "learning_rate": 3.613505415650126e-07, "loss": 0.4937, "step": 13844 }, { "epoch": 1.76, "grad_norm": 0.6328240730370884, "learning_rate": 3.6096559841064195e-07, "loss": 0.4591, "step": 13845 }, { "epoch": 1.76, "grad_norm": 0.5861750828901557, "learning_rate": 3.60580852725142e-07, "loss": 0.4517, "step": 13846 }, { "epoch": 1.76, "grad_norm": 0.6561563932691945, "learning_rate": 3.6019630452489017e-07, "loss": 0.4669, "step": 13847 }, { "epoch": 1.76, "grad_norm": 0.7338394986836524, "learning_rate": 3.598119538262546e-07, "loss": 0.5293, "step": 13848 }, { "epoch": 1.76, "grad_norm": 0.6254198596463584, "learning_rate": 3.59427800645597e-07, "loss": 0.4306, "step": 13849 }, { "epoch": 1.76, "grad_norm": 0.554412062498456, "learning_rate": 3.5904384499926784e-07, "loss": 0.4357, "step": 13850 }, { "epoch": 1.76, "grad_norm": 0.6228996084352545, "learning_rate": 3.586600869036122e-07, "loss": 0.4519, "step": 13851 }, { "epoch": 1.76, "grad_norm": 0.6375354027239836, "learning_rate": 3.582765263749649e-07, "loss": 0.443, "step": 13852 }, { "epoch": 1.76, "grad_norm": 0.6430710309397137, "learning_rate": 3.578931634296534e-07, "loss": 0.4799, "step": 13853 }, { "epoch": 1.76, "grad_norm": 0.7655115171758119, "learning_rate": 3.5750999808399525e-07, "loss": 0.542, "step": 13854 }, { "epoch": 1.77, "grad_norm": 0.5924699486983114, "learning_rate": 3.571270303543023e-07, "loss": 0.4304, "step": 13855 }, { "epoch": 1.77, "grad_norm": 0.6195383273692429, "learning_rate": 3.567442602568738e-07, "loss": 0.4958, "step": 13856 }, { "epoch": 1.77, "grad_norm": 0.6800753344749414, "learning_rate": 3.563616878080045e-07, "loss": 0.4804, "step": 13857 }, { "epoch": 1.77, "grad_norm": 0.5934224436027983, "learning_rate": 3.559793130239797e-07, "loss": 0.5057, "step": 13858 }, { "epoch": 1.77, "grad_norm": 0.6850516527681244, "learning_rate": 3.555971359210758e-07, "loss": 0.5479, "step": 13859 }, { "epoch": 1.77, "grad_norm": 0.7789188408222252, "learning_rate": 3.552151565155598e-07, "loss": 0.5909, "step": 13860 }, { "epoch": 1.77, "grad_norm": 0.7642078739819534, "learning_rate": 3.548333748236926e-07, "loss": 0.5403, "step": 13861 }, { "epoch": 1.77, "grad_norm": 0.8298622778704081, "learning_rate": 3.54451790861724e-07, "loss": 0.4905, "step": 13862 }, { "epoch": 1.77, "grad_norm": 0.6495454339225492, "learning_rate": 3.540704046458987e-07, "loss": 0.4147, "step": 13863 }, { "epoch": 1.77, "grad_norm": 0.6649833792080664, "learning_rate": 3.536892161924488e-07, "loss": 0.441, "step": 13864 }, { "epoch": 1.77, "grad_norm": 0.6633690813063798, "learning_rate": 3.5330822551760293e-07, "loss": 0.4298, "step": 13865 }, { "epoch": 1.77, "grad_norm": 0.6449533432322812, "learning_rate": 3.529274326375759e-07, "loss": 0.4723, "step": 13866 }, { "epoch": 1.77, "grad_norm": 0.6183789603585188, "learning_rate": 3.525468375685792e-07, "loss": 0.4951, "step": 13867 }, { "epoch": 1.77, "grad_norm": 0.7498526260665723, "learning_rate": 3.52166440326811e-07, "loss": 0.5427, "step": 13868 }, { "epoch": 1.77, "grad_norm": 0.7901542125195323, "learning_rate": 3.517862409284667e-07, "loss": 0.5471, "step": 13869 }, { "epoch": 1.77, "grad_norm": 0.8856876802222856, "learning_rate": 3.5140623938972665e-07, "loss": 0.5452, "step": 13870 }, { "epoch": 1.77, "grad_norm": 0.7129854178866596, "learning_rate": 3.5102643572676954e-07, "loss": 0.4685, "step": 13871 }, { "epoch": 1.77, "grad_norm": 0.6633568120708002, "learning_rate": 3.506468299557608e-07, "loss": 0.5139, "step": 13872 }, { "epoch": 1.77, "grad_norm": 0.7011580444876366, "learning_rate": 3.502674220928598e-07, "loss": 0.4764, "step": 13873 }, { "epoch": 1.77, "grad_norm": 0.7310947879818189, "learning_rate": 3.498882121542152e-07, "loss": 0.5459, "step": 13874 }, { "epoch": 1.77, "grad_norm": 0.8334136699068064, "learning_rate": 3.4950920015597134e-07, "loss": 0.5213, "step": 13875 }, { "epoch": 1.77, "grad_norm": 0.576515777221524, "learning_rate": 3.4913038611425863e-07, "loss": 0.4526, "step": 13876 }, { "epoch": 1.77, "grad_norm": 0.6030032085253701, "learning_rate": 3.487517700452042e-07, "loss": 0.4329, "step": 13877 }, { "epoch": 1.77, "grad_norm": 0.6261802796133565, "learning_rate": 3.483733519649224e-07, "loss": 0.4621, "step": 13878 }, { "epoch": 1.77, "grad_norm": 0.695748287223935, "learning_rate": 3.4799513188952416e-07, "loss": 0.4212, "step": 13879 }, { "epoch": 1.77, "grad_norm": 0.6723713901357544, "learning_rate": 3.476171098351061e-07, "loss": 0.4727, "step": 13880 }, { "epoch": 1.77, "grad_norm": 0.6297596210156473, "learning_rate": 3.47239285817762e-07, "loss": 0.4673, "step": 13881 }, { "epoch": 1.77, "grad_norm": 0.5828960801551389, "learning_rate": 3.4686165985357125e-07, "loss": 0.4065, "step": 13882 }, { "epoch": 1.77, "grad_norm": 0.5657300075515225, "learning_rate": 3.4648423195861256e-07, "loss": 0.42, "step": 13883 }, { "epoch": 1.77, "grad_norm": 0.7336804685654024, "learning_rate": 3.461070021489482e-07, "loss": 0.4773, "step": 13884 }, { "epoch": 1.77, "grad_norm": 0.8165195365837692, "learning_rate": 3.457299704406386e-07, "loss": 0.55, "step": 13885 }, { "epoch": 1.77, "grad_norm": 0.749137935184494, "learning_rate": 3.453531368497298e-07, "loss": 0.4917, "step": 13886 }, { "epoch": 1.77, "grad_norm": 0.7880036505664922, "learning_rate": 3.449765013922646e-07, "loss": 0.5054, "step": 13887 }, { "epoch": 1.77, "grad_norm": 0.6857937744287524, "learning_rate": 3.446000640842739e-07, "loss": 0.4207, "step": 13888 }, { "epoch": 1.77, "grad_norm": 0.6372098873114977, "learning_rate": 3.4422382494178284e-07, "loss": 0.5339, "step": 13889 }, { "epoch": 1.77, "grad_norm": 0.7557361811569068, "learning_rate": 3.4384778398080465e-07, "loss": 0.5668, "step": 13890 }, { "epoch": 1.77, "grad_norm": 0.8243984972921393, "learning_rate": 3.4347194121734875e-07, "loss": 0.5109, "step": 13891 }, { "epoch": 1.77, "grad_norm": 0.7462360622330714, "learning_rate": 3.430962966674106e-07, "loss": 0.4978, "step": 13892 }, { "epoch": 1.77, "grad_norm": 0.7868313266217953, "learning_rate": 3.427208503469831e-07, "loss": 0.4861, "step": 13893 }, { "epoch": 1.77, "grad_norm": 0.6387401637329191, "learning_rate": 3.423456022720456e-07, "loss": 0.4331, "step": 13894 }, { "epoch": 1.77, "grad_norm": 0.6650582331883379, "learning_rate": 3.41970552458572e-07, "loss": 0.4624, "step": 13895 }, { "epoch": 1.77, "grad_norm": 0.8034009422785001, "learning_rate": 3.4159570092252794e-07, "loss": 0.5843, "step": 13896 }, { "epoch": 1.77, "grad_norm": 0.8057105592708022, "learning_rate": 3.4122104767986773e-07, "loss": 0.4945, "step": 13897 }, { "epoch": 1.77, "grad_norm": 0.5596908149140131, "learning_rate": 3.4084659274654155e-07, "loss": 0.3782, "step": 13898 }, { "epoch": 1.77, "grad_norm": 0.6772959665058292, "learning_rate": 3.4047233613848595e-07, "loss": 0.4354, "step": 13899 }, { "epoch": 1.77, "grad_norm": 1.032361039670238, "learning_rate": 3.400982778716344e-07, "loss": 0.4821, "step": 13900 }, { "epoch": 1.77, "grad_norm": 0.6565348550119008, "learning_rate": 3.3972441796190746e-07, "loss": 0.4633, "step": 13901 }, { "epoch": 1.77, "grad_norm": 0.7783803234167097, "learning_rate": 3.393507564252207e-07, "loss": 0.5265, "step": 13902 }, { "epoch": 1.77, "grad_norm": 0.7349765947132687, "learning_rate": 3.3897729327747763e-07, "loss": 0.5264, "step": 13903 }, { "epoch": 1.77, "grad_norm": 0.6931161287055484, "learning_rate": 3.386040285345776e-07, "loss": 0.4967, "step": 13904 }, { "epoch": 1.77, "grad_norm": 0.6826683587433691, "learning_rate": 3.382309622124086e-07, "loss": 0.4655, "step": 13905 }, { "epoch": 1.77, "grad_norm": 0.6079940397089266, "learning_rate": 3.3785809432685e-07, "loss": 0.4178, "step": 13906 }, { "epoch": 1.77, "grad_norm": 0.729937165894216, "learning_rate": 3.3748542489377425e-07, "loss": 0.5002, "step": 13907 }, { "epoch": 1.77, "grad_norm": 0.7559564674709615, "learning_rate": 3.371129539290452e-07, "loss": 0.5085, "step": 13908 }, { "epoch": 1.77, "grad_norm": 0.6910279233589397, "learning_rate": 3.367406814485169e-07, "loss": 0.5158, "step": 13909 }, { "epoch": 1.77, "grad_norm": 0.7289330692797645, "learning_rate": 3.363686074680367e-07, "loss": 0.5124, "step": 13910 }, { "epoch": 1.77, "grad_norm": 0.6610475981492554, "learning_rate": 3.359967320034413e-07, "loss": 0.5435, "step": 13911 }, { "epoch": 1.77, "grad_norm": 0.7431557845335063, "learning_rate": 3.35625055070562e-07, "loss": 0.5376, "step": 13912 }, { "epoch": 1.77, "grad_norm": 0.5799827380153265, "learning_rate": 3.352535766852177e-07, "loss": 0.4497, "step": 13913 }, { "epoch": 1.77, "grad_norm": 0.6998593579425042, "learning_rate": 3.3488229686322314e-07, "loss": 0.5036, "step": 13914 }, { "epoch": 1.77, "grad_norm": 0.7670174970087149, "learning_rate": 3.3451121562038113e-07, "loss": 0.4399, "step": 13915 }, { "epoch": 1.77, "grad_norm": 0.5750503104522482, "learning_rate": 3.3414033297248847e-07, "loss": 0.4487, "step": 13916 }, { "epoch": 1.77, "grad_norm": 0.6453345311668571, "learning_rate": 3.3376964893533094e-07, "loss": 0.4351, "step": 13917 }, { "epoch": 1.77, "grad_norm": 0.6428978620592785, "learning_rate": 3.3339916352468973e-07, "loss": 0.4607, "step": 13918 }, { "epoch": 1.77, "grad_norm": 0.690796333590503, "learning_rate": 3.3302887675633166e-07, "loss": 0.5178, "step": 13919 }, { "epoch": 1.77, "grad_norm": 0.7362428342767701, "learning_rate": 3.326587886460225e-07, "loss": 0.5048, "step": 13920 }, { "epoch": 1.77, "grad_norm": 0.6961942731584494, "learning_rate": 3.32288899209513e-07, "loss": 0.4863, "step": 13921 }, { "epoch": 1.77, "grad_norm": 0.7863034174318436, "learning_rate": 3.3191920846255057e-07, "loss": 0.4891, "step": 13922 }, { "epoch": 1.77, "grad_norm": 1.4195098242739392, "learning_rate": 3.315497164208692e-07, "loss": 0.5463, "step": 13923 }, { "epoch": 1.77, "grad_norm": 0.8328250624637004, "learning_rate": 3.311804231001986e-07, "loss": 0.5589, "step": 13924 }, { "epoch": 1.77, "grad_norm": 0.7440588269308244, "learning_rate": 3.308113285162579e-07, "loss": 0.5626, "step": 13925 }, { "epoch": 1.77, "grad_norm": 0.8836272365565927, "learning_rate": 3.304424326847588e-07, "loss": 0.5001, "step": 13926 }, { "epoch": 1.77, "grad_norm": 0.7305244183483356, "learning_rate": 3.300737356214029e-07, "loss": 0.5075, "step": 13927 }, { "epoch": 1.77, "grad_norm": 0.6253911133325396, "learning_rate": 3.297052373418858e-07, "loss": 0.5077, "step": 13928 }, { "epoch": 1.77, "grad_norm": 0.6892276228134869, "learning_rate": 3.2933693786189156e-07, "loss": 0.5158, "step": 13929 }, { "epoch": 1.77, "grad_norm": 0.6337368642721258, "learning_rate": 3.289688371970995e-07, "loss": 0.4683, "step": 13930 }, { "epoch": 1.77, "grad_norm": 0.6366908351920336, "learning_rate": 3.2860093536317694e-07, "loss": 0.4714, "step": 13931 }, { "epoch": 1.77, "grad_norm": 0.6754083834133615, "learning_rate": 3.2823323237578473e-07, "loss": 0.5295, "step": 13932 }, { "epoch": 1.78, "grad_norm": 0.6987791527821199, "learning_rate": 3.2786572825057537e-07, "loss": 0.5007, "step": 13933 }, { "epoch": 1.78, "grad_norm": 0.66489382690245, "learning_rate": 3.2749842300319245e-07, "loss": 0.4955, "step": 13934 }, { "epoch": 1.78, "grad_norm": 0.8462268115227372, "learning_rate": 3.2713131664927013e-07, "loss": 0.4763, "step": 13935 }, { "epoch": 1.78, "grad_norm": 0.557540381387369, "learning_rate": 3.267644092044353e-07, "loss": 0.3954, "step": 13936 }, { "epoch": 1.78, "grad_norm": 0.641071792044636, "learning_rate": 3.2639770068430556e-07, "loss": 0.4113, "step": 13937 }, { "epoch": 1.78, "grad_norm": 0.6571592495062913, "learning_rate": 3.260311911044922e-07, "loss": 0.4277, "step": 13938 }, { "epoch": 1.78, "grad_norm": 0.6268875305966175, "learning_rate": 3.25664880480594e-07, "loss": 0.4937, "step": 13939 }, { "epoch": 1.78, "grad_norm": 0.5675852997103183, "learning_rate": 3.2529876882820553e-07, "loss": 0.4431, "step": 13940 }, { "epoch": 1.78, "grad_norm": 0.5959995921160247, "learning_rate": 3.2493285616291003e-07, "loss": 0.4353, "step": 13941 }, { "epoch": 1.78, "grad_norm": 0.7694463261577236, "learning_rate": 3.2456714250028387e-07, "loss": 0.5533, "step": 13942 }, { "epoch": 1.78, "grad_norm": 0.7662108469221817, "learning_rate": 3.2420162785589294e-07, "loss": 0.528, "step": 13943 }, { "epoch": 1.78, "grad_norm": 0.5599884285733412, "learning_rate": 3.2383631224529697e-07, "loss": 0.4038, "step": 13944 }, { "epoch": 1.78, "grad_norm": 0.6904560120952674, "learning_rate": 3.234711956840464e-07, "loss": 0.491, "step": 13945 }, { "epoch": 1.78, "grad_norm": 0.7752669660395782, "learning_rate": 3.231062781876843e-07, "loss": 0.5076, "step": 13946 }, { "epoch": 1.78, "grad_norm": 0.9706912246277095, "learning_rate": 3.227415597717415e-07, "loss": 0.5598, "step": 13947 }, { "epoch": 1.78, "grad_norm": 0.7024225533980353, "learning_rate": 3.2237704045174455e-07, "loss": 0.4365, "step": 13948 }, { "epoch": 1.78, "grad_norm": 0.6092978119496261, "learning_rate": 3.2201272024320886e-07, "loss": 0.4635, "step": 13949 }, { "epoch": 1.78, "grad_norm": 0.6476506697431429, "learning_rate": 3.2164859916164414e-07, "loss": 0.4623, "step": 13950 }, { "epoch": 1.78, "grad_norm": 0.6699698809065268, "learning_rate": 3.2128467722254753e-07, "loss": 0.4713, "step": 13951 }, { "epoch": 1.78, "grad_norm": 0.7127765002574191, "learning_rate": 3.209209544414121e-07, "loss": 0.5446, "step": 13952 }, { "epoch": 1.78, "grad_norm": 0.7597612841562652, "learning_rate": 3.205574308337189e-07, "loss": 0.4478, "step": 13953 }, { "epoch": 1.78, "grad_norm": 0.6701725170217393, "learning_rate": 3.2019410641494273e-07, "loss": 0.5156, "step": 13954 }, { "epoch": 1.78, "grad_norm": 0.700155013596913, "learning_rate": 3.1983098120054843e-07, "loss": 0.5175, "step": 13955 }, { "epoch": 1.78, "grad_norm": 0.7469307952852051, "learning_rate": 3.194680552059937e-07, "loss": 0.5006, "step": 13956 }, { "epoch": 1.78, "grad_norm": 0.5915561934657525, "learning_rate": 3.191053284467277e-07, "loss": 0.4576, "step": 13957 }, { "epoch": 1.78, "grad_norm": 0.8386925271346305, "learning_rate": 3.187428009381893e-07, "loss": 0.6131, "step": 13958 }, { "epoch": 1.78, "grad_norm": 0.7122659735138367, "learning_rate": 3.183804726958112e-07, "loss": 0.5195, "step": 13959 }, { "epoch": 1.78, "grad_norm": 0.5910205160455563, "learning_rate": 3.180183437350154e-07, "loss": 0.4839, "step": 13960 }, { "epoch": 1.78, "grad_norm": 0.5838329336689242, "learning_rate": 3.1765641407121796e-07, "loss": 0.4125, "step": 13961 }, { "epoch": 1.78, "grad_norm": 0.6265607935285245, "learning_rate": 3.1729468371982374e-07, "loss": 0.473, "step": 13962 }, { "epoch": 1.78, "grad_norm": 0.7794972082057845, "learning_rate": 3.169331526962316e-07, "loss": 0.508, "step": 13963 }, { "epoch": 1.78, "grad_norm": 0.6493473828461548, "learning_rate": 3.1657182101582916e-07, "loss": 0.4308, "step": 13964 }, { "epoch": 1.78, "grad_norm": 0.632608246794238, "learning_rate": 3.162106886939992e-07, "loss": 0.4712, "step": 13965 }, { "epoch": 1.78, "grad_norm": 0.6260838125229987, "learning_rate": 3.1584975574611265e-07, "loss": 0.4607, "step": 13966 }, { "epoch": 1.78, "grad_norm": 0.6947709650184617, "learning_rate": 3.154890221875334e-07, "loss": 0.4572, "step": 13967 }, { "epoch": 1.78, "grad_norm": 0.638385617972717, "learning_rate": 3.1512848803361753e-07, "loss": 0.5104, "step": 13968 }, { "epoch": 1.78, "grad_norm": 0.5881612898015639, "learning_rate": 3.1476815329971046e-07, "loss": 0.4661, "step": 13969 }, { "epoch": 1.78, "grad_norm": 0.6393069392769408, "learning_rate": 3.1440801800115104e-07, "loss": 0.4531, "step": 13970 }, { "epoch": 1.78, "grad_norm": 0.6276554029453595, "learning_rate": 3.140480821532704e-07, "loss": 0.5073, "step": 13971 }, { "epoch": 1.78, "grad_norm": 0.8214294487819478, "learning_rate": 3.1368834577138796e-07, "loss": 0.542, "step": 13972 }, { "epoch": 1.78, "grad_norm": 0.6951376099770419, "learning_rate": 3.133288088708175e-07, "loss": 0.483, "step": 13973 }, { "epoch": 1.78, "grad_norm": 0.5888547602571063, "learning_rate": 3.12969471466863e-07, "loss": 0.464, "step": 13974 }, { "epoch": 1.78, "grad_norm": 0.5884631414217395, "learning_rate": 3.1261033357482153e-07, "loss": 0.5056, "step": 13975 }, { "epoch": 1.78, "grad_norm": 0.7960620925808433, "learning_rate": 3.122513952099782e-07, "loss": 0.4833, "step": 13976 }, { "epoch": 1.78, "grad_norm": 0.6019083685713649, "learning_rate": 3.1189265638761347e-07, "loss": 0.4727, "step": 13977 }, { "epoch": 1.78, "grad_norm": 0.5755717235565269, "learning_rate": 3.1153411712299743e-07, "loss": 0.4591, "step": 13978 }, { "epoch": 1.78, "grad_norm": 0.5659592791419171, "learning_rate": 3.111757774313917e-07, "loss": 0.4151, "step": 13979 }, { "epoch": 1.78, "grad_norm": 0.6188428562546132, "learning_rate": 3.108176373280497e-07, "loss": 0.4629, "step": 13980 }, { "epoch": 1.78, "grad_norm": 0.8234022287641294, "learning_rate": 3.104596968282164e-07, "loss": 0.5493, "step": 13981 }, { "epoch": 1.78, "grad_norm": 0.8248951643773765, "learning_rate": 3.1010195594712747e-07, "loss": 0.5445, "step": 13982 }, { "epoch": 1.78, "grad_norm": 0.9014733865744716, "learning_rate": 3.097444147000128e-07, "loss": 0.5208, "step": 13983 }, { "epoch": 1.78, "grad_norm": 0.6440193376601012, "learning_rate": 3.0938707310208983e-07, "loss": 0.5195, "step": 13984 }, { "epoch": 1.78, "grad_norm": 0.7585046454304213, "learning_rate": 3.090299311685707e-07, "loss": 0.5411, "step": 13985 }, { "epoch": 1.78, "grad_norm": 0.6727946430994536, "learning_rate": 3.086729889146567e-07, "loss": 0.5088, "step": 13986 }, { "epoch": 1.78, "grad_norm": 0.803564451338867, "learning_rate": 3.083162463555422e-07, "loss": 0.594, "step": 13987 }, { "epoch": 1.78, "grad_norm": 0.9274941787571312, "learning_rate": 3.0795970350641246e-07, "loss": 0.5547, "step": 13988 }, { "epoch": 1.78, "grad_norm": 0.6889983649572213, "learning_rate": 3.076033603824452e-07, "loss": 0.4554, "step": 13989 }, { "epoch": 1.78, "grad_norm": 0.706236997409755, "learning_rate": 3.0724721699880777e-07, "loss": 0.4953, "step": 13990 }, { "epoch": 1.78, "grad_norm": 0.5619570316774676, "learning_rate": 3.068912733706608e-07, "loss": 0.4059, "step": 13991 }, { "epoch": 1.78, "grad_norm": 0.7089568697318206, "learning_rate": 3.0653552951315445e-07, "loss": 0.4905, "step": 13992 }, { "epoch": 1.78, "grad_norm": 0.5726343106157612, "learning_rate": 3.0617998544143324e-07, "loss": 0.4588, "step": 13993 }, { "epoch": 1.78, "grad_norm": 0.7408182850817873, "learning_rate": 3.0582464117062893e-07, "loss": 0.477, "step": 13994 }, { "epoch": 1.78, "grad_norm": 0.6636839169118857, "learning_rate": 3.0546949671587113e-07, "loss": 0.4239, "step": 13995 }, { "epoch": 1.78, "grad_norm": 0.6212678454052731, "learning_rate": 3.0511455209227435e-07, "loss": 0.4653, "step": 13996 }, { "epoch": 1.78, "grad_norm": 0.7285276004775854, "learning_rate": 3.047598073149494e-07, "loss": 0.5398, "step": 13997 }, { "epoch": 1.78, "grad_norm": 0.7669782428664222, "learning_rate": 3.044052623989946e-07, "loss": 0.558, "step": 13998 }, { "epoch": 1.78, "grad_norm": 0.819378437879541, "learning_rate": 3.040509173595041e-07, "loss": 0.5321, "step": 13999 }, { "epoch": 1.78, "grad_norm": 1.0679368328327288, "learning_rate": 3.036967722115586e-07, "loss": 0.5724, "step": 14000 }, { "epoch": 1.78, "grad_norm": 0.7800689222021858, "learning_rate": 3.033428269702349e-07, "loss": 0.4811, "step": 14001 }, { "epoch": 1.78, "grad_norm": 0.669661155047453, "learning_rate": 3.0298908165059816e-07, "loss": 0.4947, "step": 14002 }, { "epoch": 1.78, "grad_norm": 0.6831945417206252, "learning_rate": 3.026355362677075e-07, "loss": 0.5268, "step": 14003 }, { "epoch": 1.78, "grad_norm": 0.7674670645864665, "learning_rate": 3.022821908366108e-07, "loss": 0.5056, "step": 14004 }, { "epoch": 1.78, "grad_norm": 0.5893811503207735, "learning_rate": 3.0192904537235e-07, "loss": 0.4578, "step": 14005 }, { "epoch": 1.78, "grad_norm": 0.6660172474523862, "learning_rate": 3.0157609988995583e-07, "loss": 0.4605, "step": 14006 }, { "epoch": 1.78, "grad_norm": 0.7419270024896404, "learning_rate": 3.0122335440445403e-07, "loss": 0.5637, "step": 14007 }, { "epoch": 1.78, "grad_norm": 0.6677475569068402, "learning_rate": 3.0087080893085817e-07, "loss": 0.4761, "step": 14008 }, { "epoch": 1.78, "grad_norm": 0.6410427878515719, "learning_rate": 3.0051846348417677e-07, "loss": 0.4513, "step": 14009 }, { "epoch": 1.78, "grad_norm": 0.6138083224793754, "learning_rate": 3.001663180794062e-07, "loss": 0.475, "step": 14010 }, { "epoch": 1.78, "grad_norm": 0.7377231097816712, "learning_rate": 2.9981437273153727e-07, "loss": 0.5022, "step": 14011 }, { "epoch": 1.79, "grad_norm": 0.5935394125836275, "learning_rate": 2.9946262745555063e-07, "loss": 0.42, "step": 14012 }, { "epoch": 1.79, "grad_norm": 0.6633914134446034, "learning_rate": 2.991110822664195e-07, "loss": 0.5022, "step": 14013 }, { "epoch": 1.79, "grad_norm": 0.7179492815897914, "learning_rate": 2.9875973717910735e-07, "loss": 0.4617, "step": 14014 }, { "epoch": 1.79, "grad_norm": 0.6182179434818086, "learning_rate": 2.9840859220857055e-07, "loss": 0.3975, "step": 14015 }, { "epoch": 1.79, "grad_norm": 0.5743633198300937, "learning_rate": 2.980576473697555e-07, "loss": 0.4532, "step": 14016 }, { "epoch": 1.79, "grad_norm": 0.6996745275555528, "learning_rate": 2.977069026776014e-07, "loss": 0.4492, "step": 14017 }, { "epoch": 1.79, "grad_norm": 0.6228588179086584, "learning_rate": 2.9735635814703733e-07, "loss": 0.4519, "step": 14018 }, { "epoch": 1.79, "grad_norm": 0.6886039783899663, "learning_rate": 2.9700601379298587e-07, "loss": 0.4983, "step": 14019 }, { "epoch": 1.79, "grad_norm": 0.649810538626647, "learning_rate": 2.9665586963036066e-07, "loss": 0.465, "step": 14020 }, { "epoch": 1.79, "grad_norm": 0.7309190870750245, "learning_rate": 2.9630592567406466e-07, "loss": 0.5015, "step": 14021 }, { "epoch": 1.79, "grad_norm": 0.5791065531151323, "learning_rate": 2.95956181938995e-07, "loss": 0.4445, "step": 14022 }, { "epoch": 1.79, "grad_norm": 0.8335746148286484, "learning_rate": 2.9560663844003803e-07, "loss": 0.4839, "step": 14023 }, { "epoch": 1.79, "grad_norm": 0.594735876184739, "learning_rate": 2.9525729519207403e-07, "loss": 0.4593, "step": 14024 }, { "epoch": 1.79, "grad_norm": 0.9468315469606889, "learning_rate": 2.9490815220997227e-07, "loss": 0.486, "step": 14025 }, { "epoch": 1.79, "grad_norm": 0.5520555122175003, "learning_rate": 2.9455920950859527e-07, "loss": 0.4401, "step": 14026 }, { "epoch": 1.79, "grad_norm": 0.5933804684375185, "learning_rate": 2.942104671027962e-07, "loss": 0.4498, "step": 14027 }, { "epoch": 1.79, "grad_norm": 0.5874544601018593, "learning_rate": 2.9386192500741986e-07, "loss": 0.4252, "step": 14028 }, { "epoch": 1.79, "grad_norm": 0.5779599245770167, "learning_rate": 2.935135832373032e-07, "loss": 0.3962, "step": 14029 }, { "epoch": 1.79, "grad_norm": 0.7464183806437213, "learning_rate": 2.931654418072732e-07, "loss": 0.4572, "step": 14030 }, { "epoch": 1.79, "grad_norm": 1.2228171080049568, "learning_rate": 2.928175007321493e-07, "loss": 0.5534, "step": 14031 }, { "epoch": 1.79, "grad_norm": 0.7557057056985812, "learning_rate": 2.924697600267429e-07, "loss": 0.5214, "step": 14032 }, { "epoch": 1.79, "grad_norm": 0.7558367511593611, "learning_rate": 2.9212221970585484e-07, "loss": 0.4873, "step": 14033 }, { "epoch": 1.79, "grad_norm": 0.5893683884252195, "learning_rate": 2.9177487978428066e-07, "loss": 0.4727, "step": 14034 }, { "epoch": 1.79, "grad_norm": 0.840817529639047, "learning_rate": 2.91427740276804e-07, "loss": 0.554, "step": 14035 }, { "epoch": 1.79, "grad_norm": 0.7092683339300453, "learning_rate": 2.9108080119820246e-07, "loss": 0.4691, "step": 14036 }, { "epoch": 1.79, "grad_norm": 0.6562113094146875, "learning_rate": 2.907340625632432e-07, "loss": 0.4466, "step": 14037 }, { "epoch": 1.79, "grad_norm": 0.7655668962938464, "learning_rate": 2.903875243866866e-07, "loss": 0.5577, "step": 14038 }, { "epoch": 1.79, "grad_norm": 0.8720225303315561, "learning_rate": 2.900411866832825e-07, "loss": 0.542, "step": 14039 }, { "epoch": 1.79, "grad_norm": 0.9663445241833593, "learning_rate": 2.8969504946777526e-07, "loss": 0.4829, "step": 14040 }, { "epoch": 1.79, "grad_norm": 0.5830415124395745, "learning_rate": 2.89349112754897e-07, "loss": 0.4826, "step": 14041 }, { "epoch": 1.79, "grad_norm": 0.7815344051249842, "learning_rate": 2.890033765593747e-07, "loss": 0.5554, "step": 14042 }, { "epoch": 1.79, "grad_norm": 0.7299779420743207, "learning_rate": 2.8865784089592283e-07, "loss": 0.4903, "step": 14043 }, { "epoch": 1.79, "grad_norm": 0.6588200861813663, "learning_rate": 2.8831250577925295e-07, "loss": 0.4883, "step": 14044 }, { "epoch": 1.79, "grad_norm": 0.671356015042125, "learning_rate": 2.879673712240627e-07, "loss": 0.4943, "step": 14045 }, { "epoch": 1.79, "grad_norm": 0.6535884139094523, "learning_rate": 2.876224372450442e-07, "loss": 0.5036, "step": 14046 }, { "epoch": 1.79, "grad_norm": 0.758461525257078, "learning_rate": 2.872777038568797e-07, "loss": 0.5505, "step": 14047 }, { "epoch": 1.79, "grad_norm": 0.7885476357908716, "learning_rate": 2.8693317107424456e-07, "loss": 0.5791, "step": 14048 }, { "epoch": 1.79, "grad_norm": 0.7907196943945121, "learning_rate": 2.865888389118021e-07, "loss": 0.51, "step": 14049 }, { "epoch": 1.79, "grad_norm": 0.5900772149835349, "learning_rate": 2.8624470738421227e-07, "loss": 0.4474, "step": 14050 }, { "epoch": 1.79, "grad_norm": 0.7313398985403912, "learning_rate": 2.8590077650612113e-07, "loss": 0.4758, "step": 14051 }, { "epoch": 1.79, "grad_norm": 0.6059662291896458, "learning_rate": 2.855570462921703e-07, "loss": 0.4134, "step": 14052 }, { "epoch": 1.79, "grad_norm": 0.572947223531298, "learning_rate": 2.8521351675699027e-07, "loss": 0.4284, "step": 14053 }, { "epoch": 1.79, "grad_norm": 0.6055596510994963, "learning_rate": 2.8487018791520547e-07, "loss": 0.4287, "step": 14054 }, { "epoch": 1.79, "grad_norm": 0.6085240897143417, "learning_rate": 2.845270597814287e-07, "loss": 0.4917, "step": 14055 }, { "epoch": 1.79, "grad_norm": 0.6688455300730809, "learning_rate": 2.8418413237026653e-07, "loss": 0.4704, "step": 14056 }, { "epoch": 1.79, "grad_norm": 0.5967554391684378, "learning_rate": 2.838414056963157e-07, "loss": 0.4937, "step": 14057 }, { "epoch": 1.79, "grad_norm": 0.873230117461126, "learning_rate": 2.834988797741667e-07, "loss": 0.4787, "step": 14058 }, { "epoch": 1.79, "grad_norm": 0.6529949250030135, "learning_rate": 2.8315655461839784e-07, "loss": 0.4567, "step": 14059 }, { "epoch": 1.79, "grad_norm": 0.6099392927158267, "learning_rate": 2.82814430243582e-07, "loss": 0.4599, "step": 14060 }, { "epoch": 1.79, "grad_norm": 0.6804697959064162, "learning_rate": 2.8247250666428194e-07, "loss": 0.4521, "step": 14061 }, { "epoch": 1.79, "grad_norm": 0.8427440618605094, "learning_rate": 2.821307838950521e-07, "loss": 0.5176, "step": 14062 }, { "epoch": 1.79, "grad_norm": 0.6586437643507201, "learning_rate": 2.817892619504381e-07, "loss": 0.4959, "step": 14063 }, { "epoch": 1.79, "grad_norm": 0.6046241518452242, "learning_rate": 2.814479408449794e-07, "loss": 0.451, "step": 14064 }, { "epoch": 1.79, "grad_norm": 0.6286397223082102, "learning_rate": 2.8110682059320217e-07, "loss": 0.4293, "step": 14065 }, { "epoch": 1.79, "grad_norm": 0.6176440910142718, "learning_rate": 2.8076590120962864e-07, "loss": 0.4375, "step": 14066 }, { "epoch": 1.79, "grad_norm": 0.5953944316055307, "learning_rate": 2.8042518270877004e-07, "loss": 0.453, "step": 14067 }, { "epoch": 1.79, "grad_norm": 0.7992609498644916, "learning_rate": 2.800846651051292e-07, "loss": 0.4615, "step": 14068 }, { "epoch": 1.79, "grad_norm": 0.6937118233517757, "learning_rate": 2.7974434841320177e-07, "loss": 0.5198, "step": 14069 }, { "epoch": 1.79, "grad_norm": 0.6689864153452699, "learning_rate": 2.794042326474744e-07, "loss": 0.4858, "step": 14070 }, { "epoch": 1.79, "grad_norm": 0.5845665730478995, "learning_rate": 2.7906431782242336e-07, "loss": 0.4551, "step": 14071 }, { "epoch": 1.79, "grad_norm": 0.7070242436655336, "learning_rate": 2.787246039525193e-07, "loss": 0.4999, "step": 14072 }, { "epoch": 1.79, "grad_norm": 0.6345444768359401, "learning_rate": 2.7838509105222067e-07, "loss": 0.4275, "step": 14073 }, { "epoch": 1.79, "grad_norm": 0.9464502044934615, "learning_rate": 2.780457791359814e-07, "loss": 0.5652, "step": 14074 }, { "epoch": 1.79, "grad_norm": 0.8448551074620788, "learning_rate": 2.7770666821824333e-07, "loss": 0.538, "step": 14075 }, { "epoch": 1.79, "grad_norm": 0.685009793129534, "learning_rate": 2.773677583134432e-07, "loss": 0.5329, "step": 14076 }, { "epoch": 1.79, "grad_norm": 0.7454657729409372, "learning_rate": 2.7702904943600505e-07, "loss": 0.5152, "step": 14077 }, { "epoch": 1.79, "grad_norm": 1.5647278390103005, "learning_rate": 2.7669054160034845e-07, "loss": 0.5328, "step": 14078 }, { "epoch": 1.79, "grad_norm": 0.7132633719685737, "learning_rate": 2.7635223482088193e-07, "loss": 0.4969, "step": 14079 }, { "epoch": 1.79, "grad_norm": 0.8106463148157628, "learning_rate": 2.7601412911200554e-07, "loss": 0.6198, "step": 14080 }, { "epoch": 1.79, "grad_norm": 0.7482767129558527, "learning_rate": 2.756762244881128e-07, "loss": 0.5192, "step": 14081 }, { "epoch": 1.79, "grad_norm": 0.784367554973596, "learning_rate": 2.753385209635856e-07, "loss": 0.5309, "step": 14082 }, { "epoch": 1.79, "grad_norm": 0.7266783280783008, "learning_rate": 2.750010185528007e-07, "loss": 0.5556, "step": 14083 }, { "epoch": 1.79, "grad_norm": 0.7489772475972996, "learning_rate": 2.7466371727012276e-07, "loss": 0.5421, "step": 14084 }, { "epoch": 1.79, "grad_norm": 0.652354796218473, "learning_rate": 2.743266171299108e-07, "loss": 0.4804, "step": 14085 }, { "epoch": 1.79, "grad_norm": 0.7200402248185364, "learning_rate": 2.739897181465134e-07, "loss": 0.4992, "step": 14086 }, { "epoch": 1.79, "grad_norm": 0.623852013097195, "learning_rate": 2.736530203342719e-07, "loss": 0.4944, "step": 14087 }, { "epoch": 1.79, "grad_norm": 0.6779170197908893, "learning_rate": 2.7331652370751746e-07, "loss": 0.4493, "step": 14088 }, { "epoch": 1.79, "grad_norm": 0.6530279146631794, "learning_rate": 2.7298022828057493e-07, "loss": 0.4386, "step": 14089 }, { "epoch": 1.8, "grad_norm": 0.8709978422152168, "learning_rate": 2.726441340677577e-07, "loss": 0.4438, "step": 14090 }, { "epoch": 1.8, "grad_norm": 0.6681389279881853, "learning_rate": 2.7230824108337383e-07, "loss": 0.4518, "step": 14091 }, { "epoch": 1.8, "grad_norm": 0.7098123997845902, "learning_rate": 2.719725493417208e-07, "loss": 0.5652, "step": 14092 }, { "epoch": 1.8, "grad_norm": 0.7717801313587913, "learning_rate": 2.716370588570871e-07, "loss": 0.5487, "step": 14093 }, { "epoch": 1.8, "grad_norm": 0.7587300833457639, "learning_rate": 2.713017696437542e-07, "loss": 0.5346, "step": 14094 }, { "epoch": 1.8, "grad_norm": 0.5545966219464539, "learning_rate": 2.7096668171599507e-07, "loss": 0.4081, "step": 14095 }, { "epoch": 1.8, "grad_norm": 0.5899611777348205, "learning_rate": 2.7063179508807156e-07, "loss": 0.4429, "step": 14096 }, { "epoch": 1.8, "grad_norm": 0.6032067040084836, "learning_rate": 2.702971097742407e-07, "loss": 0.4537, "step": 14097 }, { "epoch": 1.8, "grad_norm": 0.5622890334649832, "learning_rate": 2.6996262578874666e-07, "loss": 0.4666, "step": 14098 }, { "epoch": 1.8, "grad_norm": 0.7250193397069739, "learning_rate": 2.696283431458296e-07, "loss": 0.566, "step": 14099 }, { "epoch": 1.8, "grad_norm": 0.7667273708490436, "learning_rate": 2.692942618597172e-07, "loss": 0.5649, "step": 14100 }, { "epoch": 1.8, "grad_norm": 1.1139490509777086, "learning_rate": 2.689603819446318e-07, "loss": 0.5136, "step": 14101 }, { "epoch": 1.8, "grad_norm": 0.6040306623828826, "learning_rate": 2.686267034147838e-07, "loss": 0.4502, "step": 14102 }, { "epoch": 1.8, "grad_norm": 0.7582261999695908, "learning_rate": 2.682932262843785e-07, "loss": 0.4291, "step": 14103 }, { "epoch": 1.8, "grad_norm": 0.6921974248929327, "learning_rate": 2.679599505676089e-07, "loss": 0.5007, "step": 14104 }, { "epoch": 1.8, "grad_norm": 0.6478401908614541, "learning_rate": 2.676268762786632e-07, "loss": 0.4535, "step": 14105 }, { "epoch": 1.8, "grad_norm": 0.6845098462955732, "learning_rate": 2.6729400343171895e-07, "loss": 0.4192, "step": 14106 }, { "epoch": 1.8, "grad_norm": 0.5924978346811467, "learning_rate": 2.6696133204094644e-07, "loss": 0.4569, "step": 14107 }, { "epoch": 1.8, "grad_norm": 0.6002734319543909, "learning_rate": 2.666288621205043e-07, "loss": 0.4227, "step": 14108 }, { "epoch": 1.8, "grad_norm": 0.5908548380285544, "learning_rate": 2.662965936845463e-07, "loss": 0.4314, "step": 14109 }, { "epoch": 1.8, "grad_norm": 0.6712863397383674, "learning_rate": 2.6596452674721497e-07, "loss": 0.4661, "step": 14110 }, { "epoch": 1.8, "grad_norm": 0.6704318657941968, "learning_rate": 2.6563266132264676e-07, "loss": 0.431, "step": 14111 }, { "epoch": 1.8, "grad_norm": 0.7483333427620246, "learning_rate": 2.6530099742496653e-07, "loss": 0.5019, "step": 14112 }, { "epoch": 1.8, "grad_norm": 0.7252154876987192, "learning_rate": 2.64969535068294e-07, "loss": 0.4598, "step": 14113 }, { "epoch": 1.8, "grad_norm": 0.6531387365096899, "learning_rate": 2.646382742667364e-07, "loss": 0.4849, "step": 14114 }, { "epoch": 1.8, "grad_norm": 0.6926964561980297, "learning_rate": 2.643072150343956e-07, "loss": 0.4824, "step": 14115 }, { "epoch": 1.8, "grad_norm": 0.648668250117498, "learning_rate": 2.639763573853632e-07, "loss": 0.4786, "step": 14116 }, { "epoch": 1.8, "grad_norm": 0.673469286592259, "learning_rate": 2.63645701333724e-07, "loss": 0.511, "step": 14117 }, { "epoch": 1.8, "grad_norm": 0.7818103331676541, "learning_rate": 2.633152468935507e-07, "loss": 0.5506, "step": 14118 }, { "epoch": 1.8, "grad_norm": 0.8796088964477371, "learning_rate": 2.6298499407891255e-07, "loss": 0.4942, "step": 14119 }, { "epoch": 1.8, "grad_norm": 0.7045766570497353, "learning_rate": 2.626549429038644e-07, "loss": 0.4946, "step": 14120 }, { "epoch": 1.8, "grad_norm": 0.7044183378852429, "learning_rate": 2.623250933824584e-07, "loss": 0.5107, "step": 14121 }, { "epoch": 1.8, "grad_norm": 0.6796896783801714, "learning_rate": 2.6199544552873267e-07, "loss": 0.4533, "step": 14122 }, { "epoch": 1.8, "grad_norm": 0.7279685941280012, "learning_rate": 2.616659993567211e-07, "loss": 0.4805, "step": 14123 }, { "epoch": 1.8, "grad_norm": 0.6419932546666827, "learning_rate": 2.6133675488044576e-07, "loss": 0.5265, "step": 14124 }, { "epoch": 1.8, "grad_norm": 0.8214619988274724, "learning_rate": 2.6100771211392317e-07, "loss": 0.5514, "step": 14125 }, { "epoch": 1.8, "grad_norm": 0.6716300731459337, "learning_rate": 2.606788710711572e-07, "loss": 0.472, "step": 14126 }, { "epoch": 1.8, "grad_norm": 0.6313143918126737, "learning_rate": 2.6035023176614825e-07, "loss": 0.4301, "step": 14127 }, { "epoch": 1.8, "grad_norm": 0.5611973907331589, "learning_rate": 2.6002179421288354e-07, "loss": 0.4325, "step": 14128 }, { "epoch": 1.8, "grad_norm": 0.6061318575586702, "learning_rate": 2.596935584253446e-07, "loss": 0.4881, "step": 14129 }, { "epoch": 1.8, "grad_norm": 0.6535731850498583, "learning_rate": 2.593655244175014e-07, "loss": 0.4459, "step": 14130 }, { "epoch": 1.8, "grad_norm": 0.811633641447587, "learning_rate": 2.590376922033211e-07, "loss": 0.5879, "step": 14131 }, { "epoch": 1.8, "grad_norm": 0.8035878315640297, "learning_rate": 2.5871006179675474e-07, "loss": 0.5354, "step": 14132 }, { "epoch": 1.8, "grad_norm": 0.7435357962086498, "learning_rate": 2.583826332117512e-07, "loss": 0.5241, "step": 14133 }, { "epoch": 1.8, "grad_norm": 0.9266090377965354, "learning_rate": 2.5805540646224645e-07, "loss": 0.4439, "step": 14134 }, { "epoch": 1.8, "grad_norm": 0.6287928295349193, "learning_rate": 2.577283815621706e-07, "loss": 0.4446, "step": 14135 }, { "epoch": 1.8, "grad_norm": 0.6734797581806532, "learning_rate": 2.5740155852544244e-07, "loss": 0.5079, "step": 14136 }, { "epoch": 1.8, "grad_norm": 0.9085841776836848, "learning_rate": 2.5707493736597533e-07, "loss": 0.4981, "step": 14137 }, { "epoch": 1.8, "grad_norm": 0.5868724158343713, "learning_rate": 2.567485180976714e-07, "loss": 0.4584, "step": 14138 }, { "epoch": 1.8, "grad_norm": 0.7160491831044241, "learning_rate": 2.564223007344263e-07, "loss": 0.5079, "step": 14139 }, { "epoch": 1.8, "grad_norm": 0.629891997597414, "learning_rate": 2.5609628529012556e-07, "loss": 0.5082, "step": 14140 }, { "epoch": 1.8, "grad_norm": 0.7321744620782598, "learning_rate": 2.5577047177864635e-07, "loss": 0.4806, "step": 14141 }, { "epoch": 1.8, "grad_norm": 0.6244628754490954, "learning_rate": 2.554448602138576e-07, "loss": 0.452, "step": 14142 }, { "epoch": 1.8, "grad_norm": 0.7834523062474261, "learning_rate": 2.5511945060962e-07, "loss": 0.5274, "step": 14143 }, { "epoch": 1.8, "grad_norm": 0.5856453441275623, "learning_rate": 2.5479424297978504e-07, "loss": 0.4809, "step": 14144 }, { "epoch": 1.8, "grad_norm": 0.5640779701611198, "learning_rate": 2.544692373381952e-07, "loss": 0.4701, "step": 14145 }, { "epoch": 1.8, "grad_norm": 0.6798381770121467, "learning_rate": 2.541444336986859e-07, "loss": 0.4591, "step": 14146 }, { "epoch": 1.8, "grad_norm": 0.5545638834611163, "learning_rate": 2.5381983207508233e-07, "loss": 0.4147, "step": 14147 }, { "epoch": 1.8, "grad_norm": 0.6093238783483694, "learning_rate": 2.534954324812022e-07, "loss": 0.4182, "step": 14148 }, { "epoch": 1.8, "grad_norm": 0.6969751259685462, "learning_rate": 2.5317123493085296e-07, "loss": 0.504, "step": 14149 }, { "epoch": 1.8, "grad_norm": 0.738101972630774, "learning_rate": 2.528472394378362e-07, "loss": 0.5398, "step": 14150 }, { "epoch": 1.8, "grad_norm": 0.6946071513650258, "learning_rate": 2.525234460159426e-07, "loss": 0.5356, "step": 14151 }, { "epoch": 1.8, "grad_norm": 0.7972383022895427, "learning_rate": 2.5219985467895445e-07, "loss": 0.5294, "step": 14152 }, { "epoch": 1.8, "grad_norm": 0.7471605575148224, "learning_rate": 2.51876465440648e-07, "loss": 0.5331, "step": 14153 }, { "epoch": 1.8, "grad_norm": 0.6961535416207767, "learning_rate": 2.515532783147867e-07, "loss": 0.5203, "step": 14154 }, { "epoch": 1.8, "grad_norm": 0.7212796545384271, "learning_rate": 2.512302933151284e-07, "loss": 0.5048, "step": 14155 }, { "epoch": 1.8, "grad_norm": 0.6752782223845183, "learning_rate": 2.5090751045542207e-07, "loss": 0.5375, "step": 14156 }, { "epoch": 1.8, "grad_norm": 0.7584000754524199, "learning_rate": 2.5058492974940673e-07, "loss": 0.5121, "step": 14157 }, { "epoch": 1.8, "grad_norm": 0.7520265050530036, "learning_rate": 2.5026255121081476e-07, "loss": 0.5215, "step": 14158 }, { "epoch": 1.8, "grad_norm": 0.7427658820386246, "learning_rate": 2.4994037485336685e-07, "loss": 0.5075, "step": 14159 }, { "epoch": 1.8, "grad_norm": 0.7978715110972681, "learning_rate": 2.4961840069077924e-07, "loss": 0.5287, "step": 14160 }, { "epoch": 1.8, "grad_norm": 0.6556385972980991, "learning_rate": 2.4929662873675544e-07, "loss": 0.467, "step": 14161 }, { "epoch": 1.8, "grad_norm": 0.7104151147077504, "learning_rate": 2.4897505900499386e-07, "loss": 0.4714, "step": 14162 }, { "epoch": 1.8, "grad_norm": 0.5939300516059455, "learning_rate": 2.486536915091808e-07, "loss": 0.4668, "step": 14163 }, { "epoch": 1.8, "grad_norm": 1.6790507804263248, "learning_rate": 2.483325262629982e-07, "loss": 0.5853, "step": 14164 }, { "epoch": 1.8, "grad_norm": 0.7865005529860053, "learning_rate": 2.48011563280115e-07, "loss": 0.5276, "step": 14165 }, { "epoch": 1.8, "grad_norm": 0.6422133243850308, "learning_rate": 2.4769080257419475e-07, "loss": 0.4197, "step": 14166 }, { "epoch": 1.8, "grad_norm": 0.5839343444576225, "learning_rate": 2.473702441588899e-07, "loss": 0.418, "step": 14167 }, { "epoch": 1.8, "grad_norm": 0.6602772753883773, "learning_rate": 2.470498880478478e-07, "loss": 0.4228, "step": 14168 }, { "epoch": 1.81, "grad_norm": 0.6581278743133802, "learning_rate": 2.4672973425470257e-07, "loss": 0.5049, "step": 14169 }, { "epoch": 1.81, "grad_norm": 0.8910652584601624, "learning_rate": 2.4640978279308446e-07, "loss": 0.5133, "step": 14170 }, { "epoch": 1.81, "grad_norm": 0.6637104040419769, "learning_rate": 2.460900336766109e-07, "loss": 0.4902, "step": 14171 }, { "epoch": 1.81, "grad_norm": 0.6810286285552736, "learning_rate": 2.457704869188937e-07, "loss": 0.4816, "step": 14172 }, { "epoch": 1.81, "grad_norm": 0.6016357085625654, "learning_rate": 2.4545114253353375e-07, "loss": 0.4554, "step": 14173 }, { "epoch": 1.81, "grad_norm": 0.6633312971221631, "learning_rate": 2.4513200053412625e-07, "loss": 0.4389, "step": 14174 }, { "epoch": 1.81, "grad_norm": 0.6490137375327155, "learning_rate": 2.4481306093425417e-07, "loss": 0.4903, "step": 14175 }, { "epoch": 1.81, "grad_norm": 0.6546991069502708, "learning_rate": 2.444943237474956e-07, "loss": 0.5127, "step": 14176 }, { "epoch": 1.81, "grad_norm": 0.7738135461862254, "learning_rate": 2.4417578898741635e-07, "loss": 0.5259, "step": 14177 }, { "epoch": 1.81, "grad_norm": 0.5983685960497321, "learning_rate": 2.438574566675772e-07, "loss": 0.3993, "step": 14178 }, { "epoch": 1.81, "grad_norm": 0.6728704214806484, "learning_rate": 2.435393268015274e-07, "loss": 0.4814, "step": 14179 }, { "epoch": 1.81, "grad_norm": 0.7666093986737927, "learning_rate": 2.432213994028082e-07, "loss": 0.4866, "step": 14180 }, { "epoch": 1.81, "grad_norm": 0.583214959649848, "learning_rate": 2.4290367448495386e-07, "loss": 0.4537, "step": 14181 }, { "epoch": 1.81, "grad_norm": 0.634754134592451, "learning_rate": 2.4258615206148916e-07, "loss": 0.5471, "step": 14182 }, { "epoch": 1.81, "grad_norm": 0.7938260868854734, "learning_rate": 2.422688321459288e-07, "loss": 0.5718, "step": 14183 }, { "epoch": 1.81, "grad_norm": 1.0925770666831487, "learning_rate": 2.419517147517819e-07, "loss": 0.4186, "step": 14184 }, { "epoch": 1.81, "grad_norm": 0.6197423048923556, "learning_rate": 2.4163479989254447e-07, "loss": 0.4508, "step": 14185 }, { "epoch": 1.81, "grad_norm": 0.6748030375728935, "learning_rate": 2.413180875817095e-07, "loss": 0.4337, "step": 14186 }, { "epoch": 1.81, "grad_norm": 0.7609903508708606, "learning_rate": 2.410015778327557e-07, "loss": 0.4469, "step": 14187 }, { "epoch": 1.81, "grad_norm": 0.7823654364909509, "learning_rate": 2.406852706591578e-07, "loss": 0.5262, "step": 14188 }, { "epoch": 1.81, "grad_norm": 0.8580675010258191, "learning_rate": 2.40369166074379e-07, "loss": 0.4782, "step": 14189 }, { "epoch": 1.81, "grad_norm": 0.6957684266554817, "learning_rate": 2.4005326409187567e-07, "loss": 0.4623, "step": 14190 }, { "epoch": 1.81, "grad_norm": 0.8943558654530902, "learning_rate": 2.3973756472509377e-07, "loss": 0.5052, "step": 14191 }, { "epoch": 1.81, "grad_norm": 0.7620042119096869, "learning_rate": 2.39422067987472e-07, "loss": 0.5197, "step": 14192 }, { "epoch": 1.81, "grad_norm": 0.807499002743479, "learning_rate": 2.391067738924402e-07, "loss": 0.5033, "step": 14193 }, { "epoch": 1.81, "grad_norm": 0.8227386023367806, "learning_rate": 2.3879168245342033e-07, "loss": 0.5204, "step": 14194 }, { "epoch": 1.81, "grad_norm": 0.5608111445297024, "learning_rate": 2.384767936838228e-07, "loss": 0.4543, "step": 14195 }, { "epoch": 1.81, "grad_norm": 0.7579136924495227, "learning_rate": 2.381621075970536e-07, "loss": 0.5764, "step": 14196 }, { "epoch": 1.81, "grad_norm": 0.7939427611548193, "learning_rate": 2.3784762420650588e-07, "loss": 0.5185, "step": 14197 }, { "epoch": 1.81, "grad_norm": 0.7619598395762461, "learning_rate": 2.3753334352556778e-07, "loss": 0.4983, "step": 14198 }, { "epoch": 1.81, "grad_norm": 0.6945604788539401, "learning_rate": 2.3721926556761587e-07, "loss": 0.532, "step": 14199 }, { "epoch": 1.81, "grad_norm": 0.885113304364197, "learning_rate": 2.369053903460211e-07, "loss": 0.5237, "step": 14200 }, { "epoch": 1.81, "grad_norm": 0.7462394685130739, "learning_rate": 2.3659171787414225e-07, "loss": 0.4768, "step": 14201 }, { "epoch": 1.81, "grad_norm": 0.6558374130082962, "learning_rate": 2.3627824816533307e-07, "loss": 0.4496, "step": 14202 }, { "epoch": 1.81, "grad_norm": 0.6702576798772589, "learning_rate": 2.3596498123293509e-07, "loss": 0.4258, "step": 14203 }, { "epoch": 1.81, "grad_norm": 0.6740631233704165, "learning_rate": 2.3565191709028489e-07, "loss": 0.4566, "step": 14204 }, { "epoch": 1.81, "grad_norm": 0.5846829182031886, "learning_rate": 2.353390557507079e-07, "loss": 0.437, "step": 14205 }, { "epoch": 1.81, "grad_norm": 0.5916320483764909, "learning_rate": 2.350263972275213e-07, "loss": 0.4743, "step": 14206 }, { "epoch": 1.81, "grad_norm": 0.6408609731013256, "learning_rate": 2.3471394153403547e-07, "loss": 0.4594, "step": 14207 }, { "epoch": 1.81, "grad_norm": 0.672894210037587, "learning_rate": 2.3440168868354817e-07, "loss": 0.4835, "step": 14208 }, { "epoch": 1.81, "grad_norm": 0.7425369044244207, "learning_rate": 2.340896386893532e-07, "loss": 0.4996, "step": 14209 }, { "epoch": 1.81, "grad_norm": 0.7406798141093591, "learning_rate": 2.3377779156473158e-07, "loss": 0.4966, "step": 14210 }, { "epoch": 1.81, "grad_norm": 0.6640633613291672, "learning_rate": 2.334661473229599e-07, "loss": 0.4534, "step": 14211 }, { "epoch": 1.81, "grad_norm": 0.6537995160561222, "learning_rate": 2.331547059773015e-07, "loss": 0.4578, "step": 14212 }, { "epoch": 1.81, "grad_norm": 0.7917700017339283, "learning_rate": 2.3284346754101574e-07, "loss": 0.5025, "step": 14213 }, { "epoch": 1.81, "grad_norm": 0.7423703768882685, "learning_rate": 2.325324320273492e-07, "loss": 0.502, "step": 14214 }, { "epoch": 1.81, "grad_norm": 0.820223889368346, "learning_rate": 2.3222159944954193e-07, "loss": 0.5087, "step": 14215 }, { "epoch": 1.81, "grad_norm": 0.7457905455285212, "learning_rate": 2.319109698208266e-07, "loss": 0.5068, "step": 14216 }, { "epoch": 1.81, "grad_norm": 0.6647725342622662, "learning_rate": 2.3160054315442381e-07, "loss": 0.4284, "step": 14217 }, { "epoch": 1.81, "grad_norm": 0.7970911902013099, "learning_rate": 2.3129031946354852e-07, "loss": 0.4313, "step": 14218 }, { "epoch": 1.81, "grad_norm": 0.6573266804737431, "learning_rate": 2.3098029876140627e-07, "loss": 0.459, "step": 14219 }, { "epoch": 1.81, "grad_norm": 0.582578886990686, "learning_rate": 2.306704810611926e-07, "loss": 0.4443, "step": 14220 }, { "epoch": 1.81, "grad_norm": 0.6904661306524759, "learning_rate": 2.3036086637609644e-07, "loss": 0.503, "step": 14221 }, { "epoch": 1.81, "grad_norm": 0.7600066241849824, "learning_rate": 2.3005145471929557e-07, "loss": 0.5218, "step": 14222 }, { "epoch": 1.81, "grad_norm": 0.7246213358428931, "learning_rate": 2.297422461039628e-07, "loss": 0.4694, "step": 14223 }, { "epoch": 1.81, "grad_norm": 0.7357316511300931, "learning_rate": 2.2943324054325812e-07, "loss": 0.4572, "step": 14224 }, { "epoch": 1.81, "grad_norm": 0.579002256008786, "learning_rate": 2.2912443805033714e-07, "loss": 0.4129, "step": 14225 }, { "epoch": 1.81, "grad_norm": 0.6264792471272272, "learning_rate": 2.288158386383421e-07, "loss": 0.4609, "step": 14226 }, { "epoch": 1.81, "grad_norm": 0.8143620046094783, "learning_rate": 2.285074423204109e-07, "loss": 0.5542, "step": 14227 }, { "epoch": 1.81, "grad_norm": 0.674348489372463, "learning_rate": 2.281992491096696e-07, "loss": 0.4377, "step": 14228 }, { "epoch": 1.81, "grad_norm": 0.6615199179737755, "learning_rate": 2.2789125901923837e-07, "loss": 0.4562, "step": 14229 }, { "epoch": 1.81, "grad_norm": 0.6976582241050214, "learning_rate": 2.2758347206222608e-07, "loss": 0.467, "step": 14230 }, { "epoch": 1.81, "grad_norm": 0.6108210703395368, "learning_rate": 2.272758882517362e-07, "loss": 0.4224, "step": 14231 }, { "epoch": 1.81, "grad_norm": 0.6508508067789405, "learning_rate": 2.2696850760085932e-07, "loss": 0.499, "step": 14232 }, { "epoch": 1.81, "grad_norm": 0.6747220968596861, "learning_rate": 2.266613301226811e-07, "loss": 0.4804, "step": 14233 }, { "epoch": 1.81, "grad_norm": 0.7298772013519176, "learning_rate": 2.263543558302761e-07, "loss": 0.4549, "step": 14234 }, { "epoch": 1.81, "grad_norm": 0.6606278211989465, "learning_rate": 2.2604758473671274e-07, "loss": 0.4332, "step": 14235 }, { "epoch": 1.81, "grad_norm": 0.6223102378118658, "learning_rate": 2.257410168550478e-07, "loss": 0.4537, "step": 14236 }, { "epoch": 1.81, "grad_norm": 0.6938975699707559, "learning_rate": 2.2543465219833138e-07, "loss": 0.4355, "step": 14237 }, { "epoch": 1.81, "grad_norm": 0.6229346998626447, "learning_rate": 2.2512849077960418e-07, "loss": 0.4295, "step": 14238 }, { "epoch": 1.81, "grad_norm": 0.7828038592950917, "learning_rate": 2.2482253261189967e-07, "loss": 0.4886, "step": 14239 }, { "epoch": 1.81, "grad_norm": 0.7449480388193708, "learning_rate": 2.2451677770823965e-07, "loss": 0.5145, "step": 14240 }, { "epoch": 1.81, "grad_norm": 0.7140851028904573, "learning_rate": 2.2421122608164148e-07, "loss": 0.5245, "step": 14241 }, { "epoch": 1.81, "grad_norm": 0.7986856240856315, "learning_rate": 2.2390587774510807e-07, "loss": 0.5514, "step": 14242 }, { "epoch": 1.81, "grad_norm": 0.8073235083730801, "learning_rate": 2.2360073271164074e-07, "loss": 0.4825, "step": 14243 }, { "epoch": 1.81, "grad_norm": 0.6398205034350987, "learning_rate": 2.2329579099422683e-07, "loss": 0.4994, "step": 14244 }, { "epoch": 1.81, "grad_norm": 0.7252708217524559, "learning_rate": 2.2299105260584708e-07, "loss": 0.5068, "step": 14245 }, { "epoch": 1.81, "grad_norm": 0.6676568311004015, "learning_rate": 2.226865175594728e-07, "loss": 0.4154, "step": 14246 }, { "epoch": 1.82, "grad_norm": 0.5742942987205643, "learning_rate": 2.2238218586806747e-07, "loss": 0.4554, "step": 14247 }, { "epoch": 1.82, "grad_norm": 0.6451532244115663, "learning_rate": 2.2207805754458523e-07, "loss": 0.498, "step": 14248 }, { "epoch": 1.82, "grad_norm": 0.7647587721599363, "learning_rate": 2.2177413260197288e-07, "loss": 0.4884, "step": 14249 }, { "epoch": 1.82, "grad_norm": 0.7121549986946512, "learning_rate": 2.2147041105316568e-07, "loss": 0.4924, "step": 14250 }, { "epoch": 1.82, "grad_norm": 0.5571829668166154, "learning_rate": 2.2116689291109327e-07, "loss": 0.4294, "step": 14251 }, { "epoch": 1.82, "grad_norm": 0.6303432724226692, "learning_rate": 2.208635781886753e-07, "loss": 0.4611, "step": 14252 }, { "epoch": 1.82, "grad_norm": 1.5081420988030918, "learning_rate": 2.2056046689882314e-07, "loss": 0.4946, "step": 14253 }, { "epoch": 1.82, "grad_norm": 0.7174590291029483, "learning_rate": 2.2025755905443812e-07, "loss": 0.51, "step": 14254 }, { "epoch": 1.82, "grad_norm": 0.8221449684588793, "learning_rate": 2.1995485466841605e-07, "loss": 0.558, "step": 14255 }, { "epoch": 1.82, "grad_norm": 0.8045615013856121, "learning_rate": 2.1965235375364047e-07, "loss": 0.4708, "step": 14256 }, { "epoch": 1.82, "grad_norm": 0.561815700041603, "learning_rate": 2.1935005632298833e-07, "loss": 0.4252, "step": 14257 }, { "epoch": 1.82, "grad_norm": 0.5586456824125487, "learning_rate": 2.190479623893277e-07, "loss": 0.4725, "step": 14258 }, { "epoch": 1.82, "grad_norm": 0.6185182469873587, "learning_rate": 2.1874607196551767e-07, "loss": 0.4582, "step": 14259 }, { "epoch": 1.82, "grad_norm": 0.6465197699176274, "learning_rate": 2.1844438506440856e-07, "loss": 0.4623, "step": 14260 }, { "epoch": 1.82, "grad_norm": 0.7453694564678071, "learning_rate": 2.1814290169884234e-07, "loss": 0.5128, "step": 14261 }, { "epoch": 1.82, "grad_norm": 0.7914320281375465, "learning_rate": 2.1784162188165203e-07, "loss": 0.5349, "step": 14262 }, { "epoch": 1.82, "grad_norm": 0.7343057723752, "learning_rate": 2.1754054562566241e-07, "loss": 0.5068, "step": 14263 }, { "epoch": 1.82, "grad_norm": 0.6749100067655267, "learning_rate": 2.172396729436893e-07, "loss": 0.4846, "step": 14264 }, { "epoch": 1.82, "grad_norm": 0.6666345933485128, "learning_rate": 2.1693900384853973e-07, "loss": 0.5019, "step": 14265 }, { "epoch": 1.82, "grad_norm": 0.7892881123681666, "learning_rate": 2.166385383530123e-07, "loss": 0.5396, "step": 14266 }, { "epoch": 1.82, "grad_norm": 0.8799389816408874, "learning_rate": 2.1633827646989625e-07, "loss": 0.5965, "step": 14267 }, { "epoch": 1.82, "grad_norm": 0.7239671191295894, "learning_rate": 2.1603821821197414e-07, "loss": 0.5937, "step": 14268 }, { "epoch": 1.82, "grad_norm": 0.7232668241298167, "learning_rate": 2.157383635920174e-07, "loss": 0.4262, "step": 14269 }, { "epoch": 1.82, "grad_norm": 0.632478263834075, "learning_rate": 2.154387126227908e-07, "loss": 0.5449, "step": 14270 }, { "epoch": 1.82, "grad_norm": 0.7911763826193682, "learning_rate": 2.151392653170481e-07, "loss": 0.461, "step": 14271 }, { "epoch": 1.82, "grad_norm": 0.6079752627376944, "learning_rate": 2.1484002168753736e-07, "loss": 0.4708, "step": 14272 }, { "epoch": 1.82, "grad_norm": 0.6134363323507177, "learning_rate": 2.1454098174699457e-07, "loss": 0.4991, "step": 14273 }, { "epoch": 1.82, "grad_norm": 0.675072111730649, "learning_rate": 2.1424214550815115e-07, "loss": 0.4943, "step": 14274 }, { "epoch": 1.82, "grad_norm": 0.6325559726103882, "learning_rate": 2.1394351298372583e-07, "loss": 0.4722, "step": 14275 }, { "epoch": 1.82, "grad_norm": 0.7409668926094698, "learning_rate": 2.1364508418643126e-07, "loss": 0.5048, "step": 14276 }, { "epoch": 1.82, "grad_norm": 0.7620679256630503, "learning_rate": 2.1334685912897112e-07, "loss": 0.4918, "step": 14277 }, { "epoch": 1.82, "grad_norm": 0.6211636332154509, "learning_rate": 2.1304883782403863e-07, "loss": 0.4927, "step": 14278 }, { "epoch": 1.82, "grad_norm": 0.6686520440456334, "learning_rate": 2.127510202843197e-07, "loss": 0.5112, "step": 14279 }, { "epoch": 1.82, "grad_norm": 0.6508980306226618, "learning_rate": 2.1245340652249314e-07, "loss": 0.4531, "step": 14280 }, { "epoch": 1.82, "grad_norm": 0.7125360401461182, "learning_rate": 2.1215599655122543e-07, "loss": 0.4952, "step": 14281 }, { "epoch": 1.82, "grad_norm": 0.7291100781441111, "learning_rate": 2.1185879038317759e-07, "loss": 0.5051, "step": 14282 }, { "epoch": 1.82, "grad_norm": 0.7242552591639636, "learning_rate": 2.1156178803100003e-07, "loss": 0.4975, "step": 14283 }, { "epoch": 1.82, "grad_norm": 0.7608049634190546, "learning_rate": 2.11264989507336e-07, "loss": 0.5049, "step": 14284 }, { "epoch": 1.82, "grad_norm": 0.6241466821737679, "learning_rate": 2.1096839482481812e-07, "loss": 0.5139, "step": 14285 }, { "epoch": 1.82, "grad_norm": 0.5823282158587701, "learning_rate": 2.10672003996073e-07, "loss": 0.4774, "step": 14286 }, { "epoch": 1.82, "grad_norm": 0.63370167903247, "learning_rate": 2.1037581703371557e-07, "loss": 0.4273, "step": 14287 }, { "epoch": 1.82, "grad_norm": 0.5793874884157907, "learning_rate": 2.1007983395035404e-07, "loss": 0.4119, "step": 14288 }, { "epoch": 1.82, "grad_norm": 0.6101172829431193, "learning_rate": 2.0978405475858776e-07, "loss": 0.4249, "step": 14289 }, { "epoch": 1.82, "grad_norm": 0.6477869064329179, "learning_rate": 2.0948847947100725e-07, "loss": 0.5052, "step": 14290 }, { "epoch": 1.82, "grad_norm": 0.816156172884686, "learning_rate": 2.0919310810019244e-07, "loss": 0.4529, "step": 14291 }, { "epoch": 1.82, "grad_norm": 0.5420124308876991, "learning_rate": 2.0889794065871938e-07, "loss": 0.4196, "step": 14292 }, { "epoch": 1.82, "grad_norm": 0.601675052657815, "learning_rate": 2.0860297715915024e-07, "loss": 0.4807, "step": 14293 }, { "epoch": 1.82, "grad_norm": 0.8084569443320689, "learning_rate": 2.0830821761404164e-07, "loss": 0.5167, "step": 14294 }, { "epoch": 1.82, "grad_norm": 0.6739866006603342, "learning_rate": 2.080136620359391e-07, "loss": 0.4798, "step": 14295 }, { "epoch": 1.82, "grad_norm": 0.6984584972235989, "learning_rate": 2.0771931043738314e-07, "loss": 0.5492, "step": 14296 }, { "epoch": 1.82, "grad_norm": 0.8506793517627377, "learning_rate": 2.0742516283090152e-07, "loss": 0.5241, "step": 14297 }, { "epoch": 1.82, "grad_norm": 0.6786402209482955, "learning_rate": 2.0713121922901646e-07, "loss": 0.4857, "step": 14298 }, { "epoch": 1.82, "grad_norm": 0.6892568028378926, "learning_rate": 2.0683747964423905e-07, "loss": 0.5102, "step": 14299 }, { "epoch": 1.82, "grad_norm": 0.7141884834468498, "learning_rate": 2.0654394408907375e-07, "loss": 0.45, "step": 14300 }, { "epoch": 1.82, "grad_norm": 0.5683286211706149, "learning_rate": 2.0625061257601497e-07, "loss": 0.4823, "step": 14301 }, { "epoch": 1.82, "grad_norm": 0.5477259721247916, "learning_rate": 2.0595748511754888e-07, "loss": 0.403, "step": 14302 }, { "epoch": 1.82, "grad_norm": 0.6021732818958573, "learning_rate": 2.0566456172615267e-07, "loss": 0.4143, "step": 14303 }, { "epoch": 1.82, "grad_norm": 0.6402655717522208, "learning_rate": 2.0537184241429586e-07, "loss": 0.447, "step": 14304 }, { "epoch": 1.82, "grad_norm": 0.8375820976538834, "learning_rate": 2.050793271944379e-07, "loss": 0.5751, "step": 14305 }, { "epoch": 1.82, "grad_norm": 0.6681169397157045, "learning_rate": 2.047870160790316e-07, "loss": 0.442, "step": 14306 }, { "epoch": 1.82, "grad_norm": 0.7216134056520734, "learning_rate": 2.0449490908051818e-07, "loss": 0.5582, "step": 14307 }, { "epoch": 1.82, "grad_norm": 0.7329176885416686, "learning_rate": 2.0420300621133203e-07, "loss": 0.4857, "step": 14308 }, { "epoch": 1.82, "grad_norm": 0.613352323621301, "learning_rate": 2.0391130748389887e-07, "loss": 0.4442, "step": 14309 }, { "epoch": 1.82, "grad_norm": 0.7346971562996681, "learning_rate": 2.0361981291063594e-07, "loss": 0.5018, "step": 14310 }, { "epoch": 1.82, "grad_norm": 0.7302737683475773, "learning_rate": 2.0332852250394942e-07, "loss": 0.5262, "step": 14311 }, { "epoch": 1.82, "grad_norm": 0.7716855993844257, "learning_rate": 2.030374362762405e-07, "loss": 0.5507, "step": 14312 }, { "epoch": 1.82, "grad_norm": 0.7196100962910339, "learning_rate": 2.0274655423989874e-07, "loss": 0.4564, "step": 14313 }, { "epoch": 1.82, "grad_norm": 0.7741450336491692, "learning_rate": 2.0245587640730646e-07, "loss": 0.5152, "step": 14314 }, { "epoch": 1.82, "grad_norm": 0.8438016494082131, "learning_rate": 2.021654027908365e-07, "loss": 0.5584, "step": 14315 }, { "epoch": 1.82, "grad_norm": 0.8099193103162748, "learning_rate": 2.0187513340285348e-07, "loss": 0.467, "step": 14316 }, { "epoch": 1.82, "grad_norm": 0.8942045995477338, "learning_rate": 2.0158506825571355e-07, "loss": 0.5332, "step": 14317 }, { "epoch": 1.82, "grad_norm": 0.7364572933271637, "learning_rate": 2.012952073617641e-07, "loss": 0.4968, "step": 14318 }, { "epoch": 1.82, "grad_norm": 0.7423099212335124, "learning_rate": 2.0100555073334305e-07, "loss": 0.4516, "step": 14319 }, { "epoch": 1.82, "grad_norm": 0.7535601754679698, "learning_rate": 2.007160983827805e-07, "loss": 0.4886, "step": 14320 }, { "epoch": 1.82, "grad_norm": 0.5766015231935084, "learning_rate": 2.004268503223966e-07, "loss": 0.4642, "step": 14321 }, { "epoch": 1.82, "grad_norm": 0.6593194930832571, "learning_rate": 2.0013780656450542e-07, "loss": 0.4716, "step": 14322 }, { "epoch": 1.82, "grad_norm": 0.7545836409127022, "learning_rate": 1.9984896712140877e-07, "loss": 0.5153, "step": 14323 }, { "epoch": 1.82, "grad_norm": 0.758240537244074, "learning_rate": 1.9956033200540347e-07, "loss": 0.4917, "step": 14324 }, { "epoch": 1.82, "grad_norm": 0.5935221226492338, "learning_rate": 1.992719012287736e-07, "loss": 0.4538, "step": 14325 }, { "epoch": 1.83, "grad_norm": 0.670530445719581, "learning_rate": 1.989836748037993e-07, "loss": 0.4381, "step": 14326 }, { "epoch": 1.83, "grad_norm": 0.6528297627962395, "learning_rate": 1.9869565274274694e-07, "loss": 0.5037, "step": 14327 }, { "epoch": 1.83, "grad_norm": 0.7006928900554417, "learning_rate": 1.984078350578783e-07, "loss": 0.507, "step": 14328 }, { "epoch": 1.83, "grad_norm": 0.7531962641993079, "learning_rate": 1.9812022176144475e-07, "loss": 0.5474, "step": 14329 }, { "epoch": 1.83, "grad_norm": 0.7138910920752443, "learning_rate": 1.978328128656881e-07, "loss": 0.4831, "step": 14330 }, { "epoch": 1.83, "grad_norm": 0.6428767985627193, "learning_rate": 1.975456083828442e-07, "loss": 0.444, "step": 14331 }, { "epoch": 1.83, "grad_norm": 0.8189333038907648, "learning_rate": 1.97258608325136e-07, "loss": 0.48, "step": 14332 }, { "epoch": 1.83, "grad_norm": 0.6982947939586598, "learning_rate": 1.969718127047826e-07, "loss": 0.484, "step": 14333 }, { "epoch": 1.83, "grad_norm": 0.8473657394318022, "learning_rate": 1.9668522153398984e-07, "loss": 0.5503, "step": 14334 }, { "epoch": 1.83, "grad_norm": 0.7459971246875465, "learning_rate": 1.9639883482495902e-07, "loss": 0.4743, "step": 14335 }, { "epoch": 1.83, "grad_norm": 0.5551321965477087, "learning_rate": 1.9611265258987876e-07, "loss": 0.4783, "step": 14336 }, { "epoch": 1.83, "grad_norm": 0.9620237164058777, "learning_rate": 1.9582667484093154e-07, "loss": 0.5783, "step": 14337 }, { "epoch": 1.83, "grad_norm": 0.7395336547558562, "learning_rate": 1.9554090159029205e-07, "loss": 0.5269, "step": 14338 }, { "epoch": 1.83, "grad_norm": 0.7679456353723602, "learning_rate": 1.9525533285012276e-07, "loss": 0.5761, "step": 14339 }, { "epoch": 1.83, "grad_norm": 0.7703394434378684, "learning_rate": 1.9496996863258066e-07, "loss": 0.5191, "step": 14340 }, { "epoch": 1.83, "grad_norm": 0.6070851924278114, "learning_rate": 1.9468480894981213e-07, "loss": 0.4398, "step": 14341 }, { "epoch": 1.83, "grad_norm": 0.582515027629796, "learning_rate": 1.9439985381395522e-07, "loss": 0.4016, "step": 14342 }, { "epoch": 1.83, "grad_norm": 0.7197967779705888, "learning_rate": 1.941151032371408e-07, "loss": 0.4623, "step": 14343 }, { "epoch": 1.83, "grad_norm": 0.6653937841667994, "learning_rate": 1.9383055723148803e-07, "loss": 0.488, "step": 14344 }, { "epoch": 1.83, "grad_norm": 0.8973540762256952, "learning_rate": 1.9354621580911114e-07, "loss": 0.4778, "step": 14345 }, { "epoch": 1.83, "grad_norm": 0.7081241069540997, "learning_rate": 1.9326207898211214e-07, "loss": 0.4847, "step": 14346 }, { "epoch": 1.83, "grad_norm": 0.7225091522059277, "learning_rate": 1.9297814676258686e-07, "loss": 0.5295, "step": 14347 }, { "epoch": 1.83, "grad_norm": 0.7794597717452841, "learning_rate": 1.926944191626201e-07, "loss": 0.5296, "step": 14348 }, { "epoch": 1.83, "grad_norm": 0.6719598152262739, "learning_rate": 1.9241089619429055e-07, "loss": 0.4763, "step": 14349 }, { "epoch": 1.83, "grad_norm": 0.7196165719288189, "learning_rate": 1.9212757786966573e-07, "loss": 0.4796, "step": 14350 }, { "epoch": 1.83, "grad_norm": 0.7187516568420024, "learning_rate": 1.918444642008066e-07, "loss": 0.4837, "step": 14351 }, { "epoch": 1.83, "grad_norm": 0.6771849079518547, "learning_rate": 1.915615551997635e-07, "loss": 0.4463, "step": 14352 }, { "epoch": 1.83, "grad_norm": 0.5668015492529176, "learning_rate": 1.91278850878579e-07, "loss": 0.3988, "step": 14353 }, { "epoch": 1.83, "grad_norm": 0.5681509805189838, "learning_rate": 1.9099635124928794e-07, "loss": 0.447, "step": 14354 }, { "epoch": 1.83, "grad_norm": 0.5465209768259386, "learning_rate": 1.9071405632391515e-07, "loss": 0.3977, "step": 14355 }, { "epoch": 1.83, "grad_norm": 0.6984040157784853, "learning_rate": 1.9043196611447656e-07, "loss": 0.5853, "step": 14356 }, { "epoch": 1.83, "grad_norm": 0.698471989453948, "learning_rate": 1.901500806329798e-07, "loss": 0.494, "step": 14357 }, { "epoch": 1.83, "grad_norm": 0.652390790679496, "learning_rate": 1.8986839989142413e-07, "loss": 0.4402, "step": 14358 }, { "epoch": 1.83, "grad_norm": 0.6144190054342878, "learning_rate": 1.895869239018e-07, "loss": 0.461, "step": 14359 }, { "epoch": 1.83, "grad_norm": 0.8401425728747953, "learning_rate": 1.893056526760878e-07, "loss": 0.4345, "step": 14360 }, { "epoch": 1.83, "grad_norm": 0.7456357420685215, "learning_rate": 1.890245862262624e-07, "loss": 0.5073, "step": 14361 }, { "epoch": 1.83, "grad_norm": 0.7894870875404851, "learning_rate": 1.8874372456428535e-07, "loss": 0.5202, "step": 14362 }, { "epoch": 1.83, "grad_norm": 0.5812635470084229, "learning_rate": 1.884630677021143e-07, "loss": 0.4794, "step": 14363 }, { "epoch": 1.83, "grad_norm": 0.6818572588045819, "learning_rate": 1.881826156516947e-07, "loss": 0.4809, "step": 14364 }, { "epoch": 1.83, "grad_norm": 0.6846420028065585, "learning_rate": 1.8790236842496535e-07, "loss": 0.5197, "step": 14365 }, { "epoch": 1.83, "grad_norm": 0.7188384169208885, "learning_rate": 1.8762232603385334e-07, "loss": 0.4575, "step": 14366 }, { "epoch": 1.83, "grad_norm": 0.6895766602006786, "learning_rate": 1.8734248849028192e-07, "loss": 0.4379, "step": 14367 }, { "epoch": 1.83, "grad_norm": 0.5570746938089288, "learning_rate": 1.8706285580616156e-07, "loss": 0.4449, "step": 14368 }, { "epoch": 1.83, "grad_norm": 0.5863194296148131, "learning_rate": 1.8678342799339555e-07, "loss": 0.4554, "step": 14369 }, { "epoch": 1.83, "grad_norm": 0.9270250193827118, "learning_rate": 1.8650420506387768e-07, "loss": 0.5244, "step": 14370 }, { "epoch": 1.83, "grad_norm": 0.575681692513114, "learning_rate": 1.862251870294951e-07, "loss": 0.4773, "step": 14371 }, { "epoch": 1.83, "grad_norm": 0.7309770903063105, "learning_rate": 1.8594637390212277e-07, "loss": 0.5412, "step": 14372 }, { "epoch": 1.83, "grad_norm": 0.8733885564109211, "learning_rate": 1.856677656936301e-07, "loss": 0.5063, "step": 14373 }, { "epoch": 1.83, "grad_norm": 1.1476141230739636, "learning_rate": 1.853893624158759e-07, "loss": 0.5112, "step": 14374 }, { "epoch": 1.83, "grad_norm": 0.6482740424283063, "learning_rate": 1.851111640807124e-07, "loss": 0.4561, "step": 14375 }, { "epoch": 1.83, "grad_norm": 0.7533072183627249, "learning_rate": 1.8483317069997898e-07, "loss": 0.5122, "step": 14376 }, { "epoch": 1.83, "grad_norm": 0.7116202359040799, "learning_rate": 1.8455538228551173e-07, "loss": 0.4947, "step": 14377 }, { "epoch": 1.83, "grad_norm": 0.6201984975811512, "learning_rate": 1.8427779884913233e-07, "loss": 0.5042, "step": 14378 }, { "epoch": 1.83, "grad_norm": 0.6418454843286304, "learning_rate": 1.8400042040265908e-07, "loss": 0.4644, "step": 14379 }, { "epoch": 1.83, "grad_norm": 0.7127074780627742, "learning_rate": 1.8372324695789813e-07, "loss": 0.4327, "step": 14380 }, { "epoch": 1.83, "grad_norm": 0.7324688951040459, "learning_rate": 1.834462785266483e-07, "loss": 0.5142, "step": 14381 }, { "epoch": 1.83, "grad_norm": 0.6210287280683057, "learning_rate": 1.8316951512069857e-07, "loss": 0.4424, "step": 14382 }, { "epoch": 1.83, "grad_norm": 0.5512871398070924, "learning_rate": 1.8289295675183006e-07, "loss": 0.4364, "step": 14383 }, { "epoch": 1.83, "grad_norm": 0.5738217896905784, "learning_rate": 1.8261660343181498e-07, "loss": 0.4774, "step": 14384 }, { "epoch": 1.83, "grad_norm": 0.6668986602429339, "learning_rate": 1.8234045517241783e-07, "loss": 0.4979, "step": 14385 }, { "epoch": 1.83, "grad_norm": 0.6342103484435309, "learning_rate": 1.8206451198539145e-07, "loss": 0.4442, "step": 14386 }, { "epoch": 1.83, "grad_norm": 0.5696002777893736, "learning_rate": 1.8178877388248362e-07, "loss": 0.4498, "step": 14387 }, { "epoch": 1.83, "grad_norm": 0.7310552534144165, "learning_rate": 1.8151324087543054e-07, "loss": 0.5068, "step": 14388 }, { "epoch": 1.83, "grad_norm": 0.712942743033302, "learning_rate": 1.8123791297596115e-07, "loss": 0.5017, "step": 14389 }, { "epoch": 1.83, "grad_norm": 0.7337625363181419, "learning_rate": 1.8096279019579498e-07, "loss": 0.5338, "step": 14390 }, { "epoch": 1.83, "grad_norm": 0.7834799209686976, "learning_rate": 1.8068787254664377e-07, "loss": 0.5198, "step": 14391 }, { "epoch": 1.83, "grad_norm": 0.8138107236415711, "learning_rate": 1.8041316004020925e-07, "loss": 0.4686, "step": 14392 }, { "epoch": 1.83, "grad_norm": 0.5985307211361228, "learning_rate": 1.8013865268818542e-07, "loss": 0.4143, "step": 14393 }, { "epoch": 1.83, "grad_norm": 0.5888372621653106, "learning_rate": 1.7986435050225737e-07, "loss": 0.4181, "step": 14394 }, { "epoch": 1.83, "grad_norm": 0.6856870981105201, "learning_rate": 1.795902534941002e-07, "loss": 0.4063, "step": 14395 }, { "epoch": 1.83, "grad_norm": 0.6615950717506082, "learning_rate": 1.7931636167538291e-07, "loss": 0.529, "step": 14396 }, { "epoch": 1.83, "grad_norm": 0.782879911872576, "learning_rate": 1.7904267505776284e-07, "loss": 0.5169, "step": 14397 }, { "epoch": 1.83, "grad_norm": 0.8333017896165456, "learning_rate": 1.7876919365289124e-07, "loss": 0.5528, "step": 14398 }, { "epoch": 1.83, "grad_norm": 0.6761720590613811, "learning_rate": 1.784959174724077e-07, "loss": 0.5111, "step": 14399 }, { "epoch": 1.83, "grad_norm": 0.7031546459476917, "learning_rate": 1.782228465279462e-07, "loss": 0.4742, "step": 14400 }, { "epoch": 1.83, "grad_norm": 0.6545302445866777, "learning_rate": 1.7794998083113023e-07, "loss": 0.4753, "step": 14401 }, { "epoch": 1.83, "grad_norm": 0.7408188720628436, "learning_rate": 1.7767732039357334e-07, "loss": 0.5305, "step": 14402 }, { "epoch": 1.83, "grad_norm": 0.742995406342412, "learning_rate": 1.7740486522688395e-07, "loss": 0.5256, "step": 14403 }, { "epoch": 1.84, "grad_norm": 0.6537276549558895, "learning_rate": 1.7713261534265835e-07, "loss": 0.5164, "step": 14404 }, { "epoch": 1.84, "grad_norm": 0.7040813962511778, "learning_rate": 1.7686057075248563e-07, "loss": 0.5236, "step": 14405 }, { "epoch": 1.84, "grad_norm": 0.7464531024224788, "learning_rate": 1.7658873146794597e-07, "loss": 0.5841, "step": 14406 }, { "epoch": 1.84, "grad_norm": 0.8376540112810112, "learning_rate": 1.7631709750061064e-07, "loss": 0.4551, "step": 14407 }, { "epoch": 1.84, "grad_norm": 0.6586639478832588, "learning_rate": 1.760456688620421e-07, "loss": 0.4042, "step": 14408 }, { "epoch": 1.84, "grad_norm": 0.7543435348258961, "learning_rate": 1.7577444556379442e-07, "loss": 0.5006, "step": 14409 }, { "epoch": 1.84, "grad_norm": 1.358788278721973, "learning_rate": 1.755034276174128e-07, "loss": 0.5745, "step": 14410 }, { "epoch": 1.84, "grad_norm": 0.7837953215144908, "learning_rate": 1.7523261503443244e-07, "loss": 0.5211, "step": 14411 }, { "epoch": 1.84, "grad_norm": 0.6097115677521421, "learning_rate": 1.7496200782638305e-07, "loss": 0.4686, "step": 14412 }, { "epoch": 1.84, "grad_norm": 0.7191320983956374, "learning_rate": 1.7469160600478151e-07, "loss": 0.5759, "step": 14413 }, { "epoch": 1.84, "grad_norm": 0.9161904464830615, "learning_rate": 1.7442140958113917e-07, "loss": 0.5435, "step": 14414 }, { "epoch": 1.84, "grad_norm": 0.6590328988866304, "learning_rate": 1.7415141856695628e-07, "loss": 0.5001, "step": 14415 }, { "epoch": 1.84, "grad_norm": 0.7542339176511058, "learning_rate": 1.7388163297372751e-07, "loss": 0.5153, "step": 14416 }, { "epoch": 1.84, "grad_norm": 0.5953506153170375, "learning_rate": 1.736120528129348e-07, "loss": 0.4413, "step": 14417 }, { "epoch": 1.84, "grad_norm": 0.674893171582603, "learning_rate": 1.7334267809605453e-07, "loss": 0.4079, "step": 14418 }, { "epoch": 1.84, "grad_norm": 0.579293993411141, "learning_rate": 1.7307350883455255e-07, "loss": 0.4506, "step": 14419 }, { "epoch": 1.84, "grad_norm": 0.5996596309423741, "learning_rate": 1.7280454503988687e-07, "loss": 0.4335, "step": 14420 }, { "epoch": 1.84, "grad_norm": 0.7210121145633174, "learning_rate": 1.7253578672350557e-07, "loss": 0.4189, "step": 14421 }, { "epoch": 1.84, "grad_norm": 0.6849425903166557, "learning_rate": 1.7226723389685007e-07, "loss": 0.425, "step": 14422 }, { "epoch": 1.84, "grad_norm": 0.5438435645429236, "learning_rate": 1.7199888657135066e-07, "loss": 0.4529, "step": 14423 }, { "epoch": 1.84, "grad_norm": 0.6064631159478787, "learning_rate": 1.71730744758431e-07, "loss": 0.4893, "step": 14424 }, { "epoch": 1.84, "grad_norm": 0.7396567303289038, "learning_rate": 1.7146280846950414e-07, "loss": 0.5121, "step": 14425 }, { "epoch": 1.84, "grad_norm": 0.8237535379009461, "learning_rate": 1.7119507771597655e-07, "loss": 0.5581, "step": 14426 }, { "epoch": 1.84, "grad_norm": 0.7115855373623007, "learning_rate": 1.7092755250924298e-07, "loss": 0.4664, "step": 14427 }, { "epoch": 1.84, "grad_norm": 0.5795528804130229, "learning_rate": 1.706602328606921e-07, "loss": 0.4285, "step": 14428 }, { "epoch": 1.84, "grad_norm": 0.6052197329005965, "learning_rate": 1.703931187817026e-07, "loss": 0.4793, "step": 14429 }, { "epoch": 1.84, "grad_norm": 0.7428254569809347, "learning_rate": 1.701262102836454e-07, "loss": 0.557, "step": 14430 }, { "epoch": 1.84, "grad_norm": 0.7218578907737273, "learning_rate": 1.6985950737788138e-07, "loss": 0.4933, "step": 14431 }, { "epoch": 1.84, "grad_norm": 0.6427328356417013, "learning_rate": 1.695930100757631e-07, "loss": 0.4895, "step": 14432 }, { "epoch": 1.84, "grad_norm": 0.7630178511730205, "learning_rate": 1.6932671838863436e-07, "loss": 0.5493, "step": 14433 }, { "epoch": 1.84, "grad_norm": 0.8267488561104419, "learning_rate": 1.69060632327831e-07, "loss": 0.5469, "step": 14434 }, { "epoch": 1.84, "grad_norm": 0.7570834705732968, "learning_rate": 1.6879475190467843e-07, "loss": 0.469, "step": 14435 }, { "epoch": 1.84, "grad_norm": 0.7482439152706846, "learning_rate": 1.6852907713049538e-07, "loss": 0.4858, "step": 14436 }, { "epoch": 1.84, "grad_norm": 0.6321480355311871, "learning_rate": 1.6826360801659002e-07, "loss": 0.4661, "step": 14437 }, { "epoch": 1.84, "grad_norm": 0.7229171698495481, "learning_rate": 1.6799834457426335e-07, "loss": 0.5012, "step": 14438 }, { "epoch": 1.84, "grad_norm": 0.7997840544590287, "learning_rate": 1.6773328681480572e-07, "loss": 0.5374, "step": 14439 }, { "epoch": 1.84, "grad_norm": 0.758735868808997, "learning_rate": 1.6746843474950093e-07, "loss": 0.4943, "step": 14440 }, { "epoch": 1.84, "grad_norm": 0.6008581114693998, "learning_rate": 1.672037883896216e-07, "loss": 0.4053, "step": 14441 }, { "epoch": 1.84, "grad_norm": 0.675822466868467, "learning_rate": 1.6693934774643428e-07, "loss": 0.5283, "step": 14442 }, { "epoch": 1.84, "grad_norm": 0.8867625261208633, "learning_rate": 1.6667511283119387e-07, "loss": 0.4966, "step": 14443 }, { "epoch": 1.84, "grad_norm": 0.6217300275017084, "learning_rate": 1.6641108365514968e-07, "loss": 0.4338, "step": 14444 }, { "epoch": 1.84, "grad_norm": 0.6252661184372692, "learning_rate": 1.6614726022953942e-07, "loss": 0.4714, "step": 14445 }, { "epoch": 1.84, "grad_norm": 0.782728641773169, "learning_rate": 1.6588364256559352e-07, "loss": 0.5237, "step": 14446 }, { "epoch": 1.84, "grad_norm": 0.6298078947291111, "learning_rate": 1.6562023067453303e-07, "loss": 0.4724, "step": 14447 }, { "epoch": 1.84, "grad_norm": 0.7690659788639767, "learning_rate": 1.6535702456757173e-07, "loss": 0.5029, "step": 14448 }, { "epoch": 1.84, "grad_norm": 0.5799968605045472, "learning_rate": 1.650940242559118e-07, "loss": 0.3983, "step": 14449 }, { "epoch": 1.84, "grad_norm": 0.6379699414208069, "learning_rate": 1.6483122975074927e-07, "loss": 0.4138, "step": 14450 }, { "epoch": 1.84, "grad_norm": 0.5947529078980037, "learning_rate": 1.645686410632702e-07, "loss": 0.4227, "step": 14451 }, { "epoch": 1.84, "grad_norm": 0.689697636348153, "learning_rate": 1.643062582046523e-07, "loss": 0.4919, "step": 14452 }, { "epoch": 1.84, "grad_norm": 0.6743307007995761, "learning_rate": 1.640440811860644e-07, "loss": 0.5007, "step": 14453 }, { "epoch": 1.84, "grad_norm": 0.9280366651488524, "learning_rate": 1.637821100186665e-07, "loss": 0.5572, "step": 14454 }, { "epoch": 1.84, "grad_norm": 0.778888620894816, "learning_rate": 1.6352034471361022e-07, "loss": 0.5236, "step": 14455 }, { "epoch": 1.84, "grad_norm": 0.6238038812525073, "learning_rate": 1.6325878528203665e-07, "loss": 0.4747, "step": 14456 }, { "epoch": 1.84, "grad_norm": 0.7367138172581367, "learning_rate": 1.629974317350813e-07, "loss": 0.4029, "step": 14457 }, { "epoch": 1.84, "grad_norm": 0.655659234061076, "learning_rate": 1.627362840838681e-07, "loss": 0.4861, "step": 14458 }, { "epoch": 1.84, "grad_norm": 0.6992592670315241, "learning_rate": 1.6247534233951423e-07, "loss": 0.4259, "step": 14459 }, { "epoch": 1.84, "grad_norm": 0.6094638887959968, "learning_rate": 1.622146065131258e-07, "loss": 0.4558, "step": 14460 }, { "epoch": 1.84, "grad_norm": 0.7047238393867782, "learning_rate": 1.619540766158023e-07, "loss": 0.4795, "step": 14461 }, { "epoch": 1.84, "grad_norm": 0.6920371787408343, "learning_rate": 1.6169375265863428e-07, "loss": 0.4882, "step": 14462 }, { "epoch": 1.84, "grad_norm": 0.7859910436065012, "learning_rate": 1.6143363465270123e-07, "loss": 0.5083, "step": 14463 }, { "epoch": 1.84, "grad_norm": 0.7130628580225253, "learning_rate": 1.611737226090776e-07, "loss": 0.5263, "step": 14464 }, { "epoch": 1.84, "grad_norm": 0.7434167665525937, "learning_rate": 1.609140165388251e-07, "loss": 0.532, "step": 14465 }, { "epoch": 1.84, "grad_norm": 0.6604126000885024, "learning_rate": 1.6065451645299933e-07, "loss": 0.4347, "step": 14466 }, { "epoch": 1.84, "grad_norm": 0.6165375822458161, "learning_rate": 1.6039522236264703e-07, "loss": 0.4076, "step": 14467 }, { "epoch": 1.84, "grad_norm": 0.662021026832994, "learning_rate": 1.6013613427880437e-07, "loss": 0.4774, "step": 14468 }, { "epoch": 1.84, "grad_norm": 0.6691638724141376, "learning_rate": 1.5987725221250083e-07, "loss": 0.4324, "step": 14469 }, { "epoch": 1.84, "grad_norm": 0.583175013690987, "learning_rate": 1.5961857617475597e-07, "loss": 0.4289, "step": 14470 }, { "epoch": 1.84, "grad_norm": 0.6947106417728373, "learning_rate": 1.5936010617658038e-07, "loss": 0.4896, "step": 14471 }, { "epoch": 1.84, "grad_norm": 0.7314112170167733, "learning_rate": 1.591018422289764e-07, "loss": 0.4931, "step": 14472 }, { "epoch": 1.84, "grad_norm": 0.6350660599853312, "learning_rate": 1.5884378434293857e-07, "loss": 0.438, "step": 14473 }, { "epoch": 1.84, "grad_norm": 0.6586084576377559, "learning_rate": 1.5858593252944976e-07, "loss": 0.5051, "step": 14474 }, { "epoch": 1.84, "grad_norm": 0.704469032913964, "learning_rate": 1.5832828679948787e-07, "loss": 0.5357, "step": 14475 }, { "epoch": 1.84, "grad_norm": 0.6282203766789904, "learning_rate": 1.58070847164018e-07, "loss": 0.4662, "step": 14476 }, { "epoch": 1.84, "grad_norm": 0.7016049527763785, "learning_rate": 1.5781361363399973e-07, "loss": 0.4889, "step": 14477 }, { "epoch": 1.84, "grad_norm": 0.9095254393783829, "learning_rate": 1.5755658622038261e-07, "loss": 0.5491, "step": 14478 }, { "epoch": 1.84, "grad_norm": 0.7014382541810558, "learning_rate": 1.572997649341079e-07, "loss": 0.4788, "step": 14479 }, { "epoch": 1.84, "grad_norm": 0.6673811059287057, "learning_rate": 1.5704314978610634e-07, "loss": 0.5153, "step": 14480 }, { "epoch": 1.84, "grad_norm": 0.6355383854832856, "learning_rate": 1.5678674078730304e-07, "loss": 0.4318, "step": 14481 }, { "epoch": 1.84, "grad_norm": 0.6043562035432801, "learning_rate": 1.5653053794861038e-07, "loss": 0.4273, "step": 14482 }, { "epoch": 1.85, "grad_norm": 0.5920650912916088, "learning_rate": 1.5627454128093633e-07, "loss": 0.4458, "step": 14483 }, { "epoch": 1.85, "grad_norm": 0.8127859498050771, "learning_rate": 1.5601875079517604e-07, "loss": 0.5404, "step": 14484 }, { "epoch": 1.85, "grad_norm": 0.6223423630057917, "learning_rate": 1.5576316650221858e-07, "loss": 0.4527, "step": 14485 }, { "epoch": 1.85, "grad_norm": 0.6197232992123837, "learning_rate": 1.55507788412943e-07, "loss": 0.4723, "step": 14486 }, { "epoch": 1.85, "grad_norm": 0.6757316842405608, "learning_rate": 1.552526165382201e-07, "loss": 0.4877, "step": 14487 }, { "epoch": 1.85, "grad_norm": 0.6756280004450077, "learning_rate": 1.549976508889117e-07, "loss": 0.5687, "step": 14488 }, { "epoch": 1.85, "grad_norm": 0.972827611458319, "learning_rate": 1.547428914758714e-07, "loss": 0.5251, "step": 14489 }, { "epoch": 1.85, "grad_norm": 0.6488402518598901, "learning_rate": 1.544883383099416e-07, "loss": 0.4061, "step": 14490 }, { "epoch": 1.85, "grad_norm": 0.5846427094925427, "learning_rate": 1.5423399140196028e-07, "loss": 0.4623, "step": 14491 }, { "epoch": 1.85, "grad_norm": 0.6188906800770388, "learning_rate": 1.5397985076275268e-07, "loss": 0.4491, "step": 14492 }, { "epoch": 1.85, "grad_norm": 0.8166495239368232, "learning_rate": 1.5372591640313794e-07, "loss": 0.5643, "step": 14493 }, { "epoch": 1.85, "grad_norm": 0.8035342285997887, "learning_rate": 1.5347218833392353e-07, "loss": 0.532, "step": 14494 }, { "epoch": 1.85, "grad_norm": 0.5803178888854694, "learning_rate": 1.5321866656591134e-07, "loss": 0.4299, "step": 14495 }, { "epoch": 1.85, "grad_norm": 1.1229990651455788, "learning_rate": 1.529653511098922e-07, "loss": 0.5132, "step": 14496 }, { "epoch": 1.85, "grad_norm": 0.6857997167177803, "learning_rate": 1.5271224197664969e-07, "loss": 0.4968, "step": 14497 }, { "epoch": 1.85, "grad_norm": 0.6212652243830155, "learning_rate": 1.5245933917695631e-07, "loss": 0.4564, "step": 14498 }, { "epoch": 1.85, "grad_norm": 0.7109407793656644, "learning_rate": 1.522066427215796e-07, "loss": 0.5032, "step": 14499 }, { "epoch": 1.85, "grad_norm": 0.7017321341876743, "learning_rate": 1.5195415262127366e-07, "loss": 0.5431, "step": 14500 }, { "epoch": 1.85, "grad_norm": 0.6398547750513115, "learning_rate": 1.5170186888678774e-07, "loss": 0.4649, "step": 14501 }, { "epoch": 1.85, "grad_norm": 0.7219712168121826, "learning_rate": 1.5144979152885985e-07, "loss": 0.4645, "step": 14502 }, { "epoch": 1.85, "grad_norm": 0.6525648576189292, "learning_rate": 1.5119792055822148e-07, "loss": 0.4145, "step": 14503 }, { "epoch": 1.85, "grad_norm": 0.596410459009014, "learning_rate": 1.5094625598559232e-07, "loss": 0.4269, "step": 14504 }, { "epoch": 1.85, "grad_norm": 0.667766828548346, "learning_rate": 1.5069479782168606e-07, "loss": 0.4744, "step": 14505 }, { "epoch": 1.85, "grad_norm": 0.6526222386796129, "learning_rate": 1.504435460772058e-07, "loss": 0.4322, "step": 14506 }, { "epoch": 1.85, "grad_norm": 0.5869795174879381, "learning_rate": 1.5019250076284741e-07, "loss": 0.4018, "step": 14507 }, { "epoch": 1.85, "grad_norm": 0.62824549481451, "learning_rate": 1.499416618892957e-07, "loss": 0.4533, "step": 14508 }, { "epoch": 1.85, "grad_norm": 0.6307235185778729, "learning_rate": 1.496910294672299e-07, "loss": 0.3945, "step": 14509 }, { "epoch": 1.85, "grad_norm": 0.5770803857584231, "learning_rate": 1.4944060350731647e-07, "loss": 0.4285, "step": 14510 }, { "epoch": 1.85, "grad_norm": 0.6526039911629956, "learning_rate": 1.4919038402021745e-07, "loss": 0.4761, "step": 14511 }, { "epoch": 1.85, "grad_norm": 0.5886203472342163, "learning_rate": 1.489403710165821e-07, "loss": 0.4598, "step": 14512 }, { "epoch": 1.85, "grad_norm": 0.5877837761888698, "learning_rate": 1.4869056450705356e-07, "loss": 0.422, "step": 14513 }, { "epoch": 1.85, "grad_norm": 0.7443476003933436, "learning_rate": 1.4844096450226497e-07, "loss": 0.5115, "step": 14514 }, { "epoch": 1.85, "grad_norm": 0.6520738132837404, "learning_rate": 1.4819157101284122e-07, "loss": 0.4967, "step": 14515 }, { "epoch": 1.85, "grad_norm": 0.8766459912710023, "learning_rate": 1.479423840493982e-07, "loss": 0.4923, "step": 14516 }, { "epoch": 1.85, "grad_norm": 0.7140035548679665, "learning_rate": 1.4769340362254304e-07, "loss": 0.5347, "step": 14517 }, { "epoch": 1.85, "grad_norm": 0.857810427424583, "learning_rate": 1.4744462974287387e-07, "loss": 0.5784, "step": 14518 }, { "epoch": 1.85, "grad_norm": 0.7158086818250785, "learning_rate": 1.4719606242098006e-07, "loss": 0.4984, "step": 14519 }, { "epoch": 1.85, "grad_norm": 0.5921914989365321, "learning_rate": 1.469477016674431e-07, "loss": 0.4356, "step": 14520 }, { "epoch": 1.85, "grad_norm": 0.5550821315392203, "learning_rate": 1.4669954749283345e-07, "loss": 0.4329, "step": 14521 }, { "epoch": 1.85, "grad_norm": 0.591229356921549, "learning_rate": 1.4645159990771597e-07, "loss": 0.442, "step": 14522 }, { "epoch": 1.85, "grad_norm": 0.8594987245639256, "learning_rate": 1.462038589226439e-07, "loss": 0.4894, "step": 14523 }, { "epoch": 1.85, "grad_norm": 0.7994847609341053, "learning_rate": 1.4595632454816266e-07, "loss": 0.5043, "step": 14524 }, { "epoch": 1.85, "grad_norm": 0.8018168242833844, "learning_rate": 1.4570899679481e-07, "loss": 0.4758, "step": 14525 }, { "epoch": 1.85, "grad_norm": 0.6457460871706924, "learning_rate": 1.4546187567311242e-07, "loss": 0.5387, "step": 14526 }, { "epoch": 1.85, "grad_norm": 0.7775532793024835, "learning_rate": 1.4521496119359048e-07, "loss": 0.4923, "step": 14527 }, { "epoch": 1.85, "grad_norm": 0.5896969529262038, "learning_rate": 1.44968253366754e-07, "loss": 0.4817, "step": 14528 }, { "epoch": 1.85, "grad_norm": 0.8525329723145951, "learning_rate": 1.4472175220310415e-07, "loss": 0.5172, "step": 14529 }, { "epoch": 1.85, "grad_norm": 0.6519812938902835, "learning_rate": 1.4447545771313466e-07, "loss": 0.4208, "step": 14530 }, { "epoch": 1.85, "grad_norm": 0.6784946130508249, "learning_rate": 1.4422936990732828e-07, "loss": 0.5197, "step": 14531 }, { "epoch": 1.85, "grad_norm": 0.7949929651395813, "learning_rate": 1.4398348879616165e-07, "loss": 0.5202, "step": 14532 }, { "epoch": 1.85, "grad_norm": 0.7169030844721569, "learning_rate": 1.4373781439009914e-07, "loss": 0.5042, "step": 14533 }, { "epoch": 1.85, "grad_norm": 0.702419579389354, "learning_rate": 1.4349234669960076e-07, "loss": 0.4837, "step": 14534 }, { "epoch": 1.85, "grad_norm": 0.7330248232972041, "learning_rate": 1.4324708573511314e-07, "loss": 0.4556, "step": 14535 }, { "epoch": 1.85, "grad_norm": 0.6498972802900028, "learning_rate": 1.4300203150707736e-07, "loss": 0.518, "step": 14536 }, { "epoch": 1.85, "grad_norm": 0.676660720340068, "learning_rate": 1.42757184025924e-07, "loss": 0.4242, "step": 14537 }, { "epoch": 1.85, "grad_norm": 0.7435679249306024, "learning_rate": 1.425125433020763e-07, "loss": 0.4652, "step": 14538 }, { "epoch": 1.85, "grad_norm": 0.7641399063718642, "learning_rate": 1.4226810934594658e-07, "loss": 0.4827, "step": 14539 }, { "epoch": 1.85, "grad_norm": 0.7239936715137886, "learning_rate": 1.4202388216794093e-07, "loss": 0.5392, "step": 14540 }, { "epoch": 1.85, "grad_norm": 0.7345741021398988, "learning_rate": 1.4177986177845437e-07, "loss": 0.5608, "step": 14541 }, { "epoch": 1.85, "grad_norm": 0.719044230759214, "learning_rate": 1.4153604818787526e-07, "loss": 0.5096, "step": 14542 }, { "epoch": 1.85, "grad_norm": 0.717675609566851, "learning_rate": 1.4129244140658027e-07, "loss": 0.4582, "step": 14543 }, { "epoch": 1.85, "grad_norm": 0.6885341246476963, "learning_rate": 1.4104904144494058e-07, "loss": 0.4464, "step": 14544 }, { "epoch": 1.85, "grad_norm": 0.5867385715439626, "learning_rate": 1.4080584831331566e-07, "loss": 0.4309, "step": 14545 }, { "epoch": 1.85, "grad_norm": 0.5945871451323491, "learning_rate": 1.4056286202205837e-07, "loss": 0.4984, "step": 14546 }, { "epoch": 1.85, "grad_norm": 0.7194028118012333, "learning_rate": 1.4032008258151153e-07, "loss": 0.4515, "step": 14547 }, { "epoch": 1.85, "grad_norm": 1.4302932998509084, "learning_rate": 1.400775100020091e-07, "loss": 0.5369, "step": 14548 }, { "epoch": 1.85, "grad_norm": 0.7475247982560195, "learning_rate": 1.3983514429387723e-07, "loss": 0.4846, "step": 14549 }, { "epoch": 1.85, "grad_norm": 0.6011457280385895, "learning_rate": 1.3959298546743272e-07, "loss": 0.4755, "step": 14550 }, { "epoch": 1.85, "grad_norm": 0.7837452846008919, "learning_rate": 1.393510335329823e-07, "loss": 0.5265, "step": 14551 }, { "epoch": 1.85, "grad_norm": 0.6057801509136566, "learning_rate": 1.3910928850082606e-07, "loss": 0.4712, "step": 14552 }, { "epoch": 1.85, "grad_norm": 0.5701181999762271, "learning_rate": 1.3886775038125465e-07, "loss": 0.4127, "step": 14553 }, { "epoch": 1.85, "grad_norm": 0.6123246913100189, "learning_rate": 1.3862641918454934e-07, "loss": 0.4187, "step": 14554 }, { "epoch": 1.85, "grad_norm": 0.566855188884701, "learning_rate": 1.383852949209824e-07, "loss": 0.3801, "step": 14555 }, { "epoch": 1.85, "grad_norm": 0.5608775318801511, "learning_rate": 1.381443776008179e-07, "loss": 0.4494, "step": 14556 }, { "epoch": 1.85, "grad_norm": 0.6587409240209053, "learning_rate": 1.379036672343109e-07, "loss": 0.4605, "step": 14557 }, { "epoch": 1.85, "grad_norm": 0.6474466195261291, "learning_rate": 1.376631638317083e-07, "loss": 0.4626, "step": 14558 }, { "epoch": 1.85, "grad_norm": 0.8392079079111774, "learning_rate": 1.3742286740324628e-07, "loss": 0.4825, "step": 14559 }, { "epoch": 1.85, "grad_norm": 0.5618829599446743, "learning_rate": 1.3718277795915502e-07, "loss": 0.4789, "step": 14560 }, { "epoch": 1.86, "grad_norm": 0.7011658156287061, "learning_rate": 1.36942895509653e-07, "loss": 0.4597, "step": 14561 }, { "epoch": 1.86, "grad_norm": 0.5860859935726452, "learning_rate": 1.3670322006495208e-07, "loss": 0.4454, "step": 14562 }, { "epoch": 1.86, "grad_norm": 0.6816245301979058, "learning_rate": 1.364637516352535e-07, "loss": 0.527, "step": 14563 }, { "epoch": 1.86, "grad_norm": 0.7804722749077926, "learning_rate": 1.362244902307519e-07, "loss": 0.5134, "step": 14564 }, { "epoch": 1.86, "grad_norm": 0.7982628561171938, "learning_rate": 1.3598543586163082e-07, "loss": 0.5892, "step": 14565 }, { "epoch": 1.86, "grad_norm": 0.6850894904284699, "learning_rate": 1.3574658853806767e-07, "loss": 0.5037, "step": 14566 }, { "epoch": 1.86, "grad_norm": 0.6291806507558616, "learning_rate": 1.3550794827022762e-07, "loss": 0.4634, "step": 14567 }, { "epoch": 1.86, "grad_norm": 0.6565873722242394, "learning_rate": 1.352695150682698e-07, "loss": 0.4925, "step": 14568 }, { "epoch": 1.86, "grad_norm": 0.7229310978348868, "learning_rate": 1.3503128894234273e-07, "loss": 0.4978, "step": 14569 }, { "epoch": 1.86, "grad_norm": 0.7584619876399255, "learning_rate": 1.3479326990258833e-07, "loss": 0.5501, "step": 14570 }, { "epoch": 1.86, "grad_norm": 0.8914743568964838, "learning_rate": 1.3455545795913682e-07, "loss": 0.5132, "step": 14571 }, { "epoch": 1.86, "grad_norm": 0.6733122593057456, "learning_rate": 1.3431785312211231e-07, "loss": 0.4512, "step": 14572 }, { "epoch": 1.86, "grad_norm": 0.743236306732788, "learning_rate": 1.340804554016273e-07, "loss": 0.4778, "step": 14573 }, { "epoch": 1.86, "grad_norm": 0.7656147085180478, "learning_rate": 1.338432648077892e-07, "loss": 0.5278, "step": 14574 }, { "epoch": 1.86, "grad_norm": 0.6399933399756362, "learning_rate": 1.336062813506922e-07, "loss": 0.469, "step": 14575 }, { "epoch": 1.86, "grad_norm": 0.6192135710951807, "learning_rate": 1.3336950504042545e-07, "loss": 0.4656, "step": 14576 }, { "epoch": 1.86, "grad_norm": 0.6039001556433152, "learning_rate": 1.3313293588706754e-07, "loss": 0.4957, "step": 14577 }, { "epoch": 1.86, "grad_norm": 0.7721168464699927, "learning_rate": 1.328965739006882e-07, "loss": 0.5114, "step": 14578 }, { "epoch": 1.86, "grad_norm": 0.5999136132235355, "learning_rate": 1.3266041909134886e-07, "loss": 0.4398, "step": 14579 }, { "epoch": 1.86, "grad_norm": 0.5924322948361385, "learning_rate": 1.3242447146910141e-07, "loss": 0.4754, "step": 14580 }, { "epoch": 1.86, "grad_norm": 0.6882058759849665, "learning_rate": 1.321887310439901e-07, "loss": 0.4784, "step": 14581 }, { "epoch": 1.86, "grad_norm": 0.8901735178805802, "learning_rate": 1.3195319782604854e-07, "loss": 0.5337, "step": 14582 }, { "epoch": 1.86, "grad_norm": 0.6418153344028918, "learning_rate": 1.3171787182530372e-07, "loss": 0.474, "step": 14583 }, { "epoch": 1.86, "grad_norm": 0.783898753970461, "learning_rate": 1.3148275305177205e-07, "loss": 0.4943, "step": 14584 }, { "epoch": 1.86, "grad_norm": 0.5881715779871449, "learning_rate": 1.3124784151546165e-07, "loss": 0.4588, "step": 14585 }, { "epoch": 1.86, "grad_norm": 0.6604192284523114, "learning_rate": 1.3101313722637342e-07, "loss": 0.4383, "step": 14586 }, { "epoch": 1.86, "grad_norm": 0.6337258283479443, "learning_rate": 1.3077864019449605e-07, "loss": 0.4316, "step": 14587 }, { "epoch": 1.86, "grad_norm": 0.5700042396604457, "learning_rate": 1.305443504298126e-07, "loss": 0.4344, "step": 14588 }, { "epoch": 1.86, "grad_norm": 0.6786612261673106, "learning_rate": 1.3031026794229517e-07, "loss": 0.4606, "step": 14589 }, { "epoch": 1.86, "grad_norm": 0.5736506269511835, "learning_rate": 1.3007639274190853e-07, "loss": 0.4579, "step": 14590 }, { "epoch": 1.86, "grad_norm": 0.7300564335563268, "learning_rate": 1.2984272483860805e-07, "loss": 0.5077, "step": 14591 }, { "epoch": 1.86, "grad_norm": 1.1174411821280756, "learning_rate": 1.2960926424233966e-07, "loss": 0.518, "step": 14592 }, { "epoch": 1.86, "grad_norm": 0.8871756611870556, "learning_rate": 1.2937601096304154e-07, "loss": 0.5344, "step": 14593 }, { "epoch": 1.86, "grad_norm": 1.2122654838973022, "learning_rate": 1.2914296501064182e-07, "loss": 0.5155, "step": 14594 }, { "epoch": 1.86, "grad_norm": 0.7778307725094109, "learning_rate": 1.2891012639506206e-07, "loss": 0.5356, "step": 14595 }, { "epoch": 1.86, "grad_norm": 0.7482868953021833, "learning_rate": 1.2867749512621154e-07, "loss": 0.5196, "step": 14596 }, { "epoch": 1.86, "grad_norm": 0.6278876386525926, "learning_rate": 1.2844507121399396e-07, "loss": 0.4271, "step": 14597 }, { "epoch": 1.86, "grad_norm": 0.6086691509186989, "learning_rate": 1.2821285466830202e-07, "loss": 0.5018, "step": 14598 }, { "epoch": 1.86, "grad_norm": 0.8196907956198314, "learning_rate": 1.2798084549902169e-07, "loss": 0.523, "step": 14599 }, { "epoch": 1.86, "grad_norm": 0.5589035734953659, "learning_rate": 1.2774904371602724e-07, "loss": 0.4046, "step": 14600 }, { "epoch": 1.86, "grad_norm": 0.5769065683573698, "learning_rate": 1.2751744932918642e-07, "loss": 0.3911, "step": 14601 }, { "epoch": 1.86, "grad_norm": 0.6811009711376147, "learning_rate": 1.2728606234835795e-07, "loss": 0.4544, "step": 14602 }, { "epoch": 1.86, "grad_norm": 0.7146595682282406, "learning_rate": 1.2705488278339117e-07, "loss": 0.5306, "step": 14603 }, { "epoch": 1.86, "grad_norm": 0.7000902270712882, "learning_rate": 1.2682391064412548e-07, "loss": 0.4615, "step": 14604 }, { "epoch": 1.86, "grad_norm": 0.6583750245798526, "learning_rate": 1.2659314594039408e-07, "loss": 0.4267, "step": 14605 }, { "epoch": 1.86, "grad_norm": 0.6679084694987021, "learning_rate": 1.2636258868201912e-07, "loss": 0.4153, "step": 14606 }, { "epoch": 1.86, "grad_norm": 0.6923999599355295, "learning_rate": 1.2613223887881497e-07, "loss": 0.4522, "step": 14607 }, { "epoch": 1.86, "grad_norm": 0.6789242760780523, "learning_rate": 1.259020965405866e-07, "loss": 0.4637, "step": 14608 }, { "epoch": 1.86, "grad_norm": 2.9235194670770213, "learning_rate": 1.2567216167713115e-07, "loss": 0.5366, "step": 14609 }, { "epoch": 1.86, "grad_norm": 0.7152871563768175, "learning_rate": 1.2544243429823467e-07, "loss": 0.4941, "step": 14610 }, { "epoch": 1.86, "grad_norm": 0.6243540079591607, "learning_rate": 1.2521291441367766e-07, "loss": 0.4494, "step": 14611 }, { "epoch": 1.86, "grad_norm": 0.6958112757075704, "learning_rate": 1.24983602033229e-07, "loss": 0.4868, "step": 14612 }, { "epoch": 1.86, "grad_norm": 0.7083058652321498, "learning_rate": 1.2475449716665033e-07, "loss": 0.4601, "step": 14613 }, { "epoch": 1.86, "grad_norm": 0.5621427207050358, "learning_rate": 1.2452559982369273e-07, "loss": 0.4474, "step": 14614 }, { "epoch": 1.86, "grad_norm": 1.2900831833999094, "learning_rate": 1.2429691001410115e-07, "loss": 0.5252, "step": 14615 }, { "epoch": 1.86, "grad_norm": 0.790898987612816, "learning_rate": 1.2406842774760953e-07, "loss": 0.555, "step": 14616 }, { "epoch": 1.86, "grad_norm": 0.882540604637034, "learning_rate": 1.2384015303394448e-07, "loss": 0.5273, "step": 14617 }, { "epoch": 1.86, "grad_norm": 0.636225172032499, "learning_rate": 1.2361208588282104e-07, "loss": 0.4899, "step": 14618 }, { "epoch": 1.86, "grad_norm": 0.7052801821140529, "learning_rate": 1.2338422630394918e-07, "loss": 0.4697, "step": 14619 }, { "epoch": 1.86, "grad_norm": 0.7895833079543482, "learning_rate": 1.2315657430702677e-07, "loss": 0.5409, "step": 14620 }, { "epoch": 1.86, "grad_norm": 0.7668180357535186, "learning_rate": 1.2292912990174545e-07, "loss": 0.4946, "step": 14621 }, { "epoch": 1.86, "grad_norm": 0.6331665974804866, "learning_rate": 1.227018930977858e-07, "loss": 0.4135, "step": 14622 }, { "epoch": 1.86, "grad_norm": 0.6740088592984881, "learning_rate": 1.224748639048212e-07, "loss": 0.4493, "step": 14623 }, { "epoch": 1.86, "grad_norm": 0.6969125898696111, "learning_rate": 1.2224804233251508e-07, "loss": 0.4884, "step": 14624 }, { "epoch": 1.86, "grad_norm": 0.7569954787407913, "learning_rate": 1.22021428390523e-07, "loss": 0.4601, "step": 14625 }, { "epoch": 1.86, "grad_norm": 0.6146385304382157, "learning_rate": 1.2179502208848947e-07, "loss": 0.5026, "step": 14626 }, { "epoch": 1.86, "grad_norm": 0.5787184606508474, "learning_rate": 1.2156882343605514e-07, "loss": 0.4627, "step": 14627 }, { "epoch": 1.86, "grad_norm": 0.7465740236081377, "learning_rate": 1.2134283244284617e-07, "loss": 0.4755, "step": 14628 }, { "epoch": 1.86, "grad_norm": 0.6900643821854868, "learning_rate": 1.2111704911848266e-07, "loss": 0.5231, "step": 14629 }, { "epoch": 1.86, "grad_norm": 0.7509137695026366, "learning_rate": 1.208914734725758e-07, "loss": 0.5383, "step": 14630 }, { "epoch": 1.86, "grad_norm": 0.7421307277549158, "learning_rate": 1.206661055147279e-07, "loss": 0.5599, "step": 14631 }, { "epoch": 1.86, "grad_norm": 0.6833315961151777, "learning_rate": 1.204409452545313e-07, "loss": 0.4763, "step": 14632 }, { "epoch": 1.86, "grad_norm": 0.6107178070256561, "learning_rate": 1.2021599270157113e-07, "loss": 0.4112, "step": 14633 }, { "epoch": 1.86, "grad_norm": 0.5799072262591997, "learning_rate": 1.1999124786542248e-07, "loss": 0.3764, "step": 14634 }, { "epoch": 1.86, "grad_norm": 0.577664823114061, "learning_rate": 1.1976671075565217e-07, "loss": 0.4498, "step": 14635 }, { "epoch": 1.86, "grad_norm": 0.6894000321087668, "learning_rate": 1.1954238138181805e-07, "loss": 0.5032, "step": 14636 }, { "epoch": 1.86, "grad_norm": 0.6041644158444297, "learning_rate": 1.1931825975346868e-07, "loss": 0.4538, "step": 14637 }, { "epoch": 1.86, "grad_norm": 0.5962228695868987, "learning_rate": 1.1909434588014468e-07, "loss": 0.4349, "step": 14638 }, { "epoch": 1.86, "grad_norm": 0.6396146831660878, "learning_rate": 1.1887063977137736e-07, "loss": 0.4567, "step": 14639 }, { "epoch": 1.87, "grad_norm": 0.6131768772212417, "learning_rate": 1.1864714143668909e-07, "loss": 0.4471, "step": 14640 }, { "epoch": 1.87, "grad_norm": 0.8675832151126761, "learning_rate": 1.1842385088559338e-07, "loss": 0.562, "step": 14641 }, { "epoch": 1.87, "grad_norm": 0.7402087082460594, "learning_rate": 1.182007681275954e-07, "loss": 0.5589, "step": 14642 }, { "epoch": 1.87, "grad_norm": 1.3624700153912672, "learning_rate": 1.1797789317218978e-07, "loss": 0.5006, "step": 14643 }, { "epoch": 1.87, "grad_norm": 0.7873636676316907, "learning_rate": 1.1775522602886558e-07, "loss": 0.5141, "step": 14644 }, { "epoch": 1.87, "grad_norm": 0.6897747349439108, "learning_rate": 1.1753276670709912e-07, "loss": 0.4907, "step": 14645 }, { "epoch": 1.87, "grad_norm": 0.6201502334450523, "learning_rate": 1.1731051521636171e-07, "loss": 0.5008, "step": 14646 }, { "epoch": 1.87, "grad_norm": 0.5911864940916477, "learning_rate": 1.1708847156611191e-07, "loss": 0.457, "step": 14647 }, { "epoch": 1.87, "grad_norm": 0.6173175911011777, "learning_rate": 1.1686663576580215e-07, "loss": 0.4249, "step": 14648 }, { "epoch": 1.87, "grad_norm": 0.629052251043333, "learning_rate": 1.1664500782487653e-07, "loss": 0.4243, "step": 14649 }, { "epoch": 1.87, "grad_norm": 0.5912618835924818, "learning_rate": 1.1642358775276696e-07, "loss": 0.4059, "step": 14650 }, { "epoch": 1.87, "grad_norm": 0.7113176664331865, "learning_rate": 1.162023755588998e-07, "loss": 0.4278, "step": 14651 }, { "epoch": 1.87, "grad_norm": 0.637757032255662, "learning_rate": 1.1598137125269138e-07, "loss": 0.4593, "step": 14652 }, { "epoch": 1.87, "grad_norm": 0.5729251154003422, "learning_rate": 1.1576057484354864e-07, "loss": 0.4312, "step": 14653 }, { "epoch": 1.87, "grad_norm": 0.648040926848014, "learning_rate": 1.1553998634087072e-07, "loss": 0.4394, "step": 14654 }, { "epoch": 1.87, "grad_norm": 0.6128300051972277, "learning_rate": 1.1531960575404677e-07, "loss": 0.462, "step": 14655 }, { "epoch": 1.87, "grad_norm": 0.6777619319129019, "learning_rate": 1.1509943309245874e-07, "loss": 0.4865, "step": 14656 }, { "epoch": 1.87, "grad_norm": 0.7435598296034476, "learning_rate": 1.1487946836547693e-07, "loss": 0.4649, "step": 14657 }, { "epoch": 1.87, "grad_norm": 0.7167262540743504, "learning_rate": 1.1465971158246602e-07, "loss": 0.4647, "step": 14658 }, { "epoch": 1.87, "grad_norm": 0.7603951327247286, "learning_rate": 1.1444016275277969e-07, "loss": 0.5255, "step": 14659 }, { "epoch": 1.87, "grad_norm": 0.6315569605304828, "learning_rate": 1.1422082188576434e-07, "loss": 0.4409, "step": 14660 }, { "epoch": 1.87, "grad_norm": 0.7004801004714795, "learning_rate": 1.140016889907547e-07, "loss": 0.4345, "step": 14661 }, { "epoch": 1.87, "grad_norm": 0.6830195970723086, "learning_rate": 1.1378276407708055e-07, "loss": 0.4291, "step": 14662 }, { "epoch": 1.87, "grad_norm": 0.5676068452273167, "learning_rate": 1.1356404715405945e-07, "loss": 0.3877, "step": 14663 }, { "epoch": 1.87, "grad_norm": 0.6114703550134339, "learning_rate": 1.1334553823100225e-07, "loss": 0.4477, "step": 14664 }, { "epoch": 1.87, "grad_norm": 0.5742720169857399, "learning_rate": 1.1312723731721043e-07, "loss": 0.4642, "step": 14665 }, { "epoch": 1.87, "grad_norm": 0.6013184415603765, "learning_rate": 1.1290914442197543e-07, "loss": 0.4617, "step": 14666 }, { "epoch": 1.87, "grad_norm": 0.6767833182341734, "learning_rate": 1.126912595545815e-07, "loss": 0.5107, "step": 14667 }, { "epoch": 1.87, "grad_norm": 0.6892354729171455, "learning_rate": 1.1247358272430286e-07, "loss": 0.4794, "step": 14668 }, { "epoch": 1.87, "grad_norm": 0.7783881609870613, "learning_rate": 1.1225611394040547e-07, "loss": 0.5574, "step": 14669 }, { "epoch": 1.87, "grad_norm": 0.738540489691049, "learning_rate": 1.1203885321214692e-07, "loss": 0.4607, "step": 14670 }, { "epoch": 1.87, "grad_norm": 0.8260641125064003, "learning_rate": 1.1182180054877368e-07, "loss": 0.5327, "step": 14671 }, { "epoch": 1.87, "grad_norm": 0.6833765563018915, "learning_rate": 1.1160495595952726e-07, "loss": 0.465, "step": 14672 }, { "epoch": 1.87, "grad_norm": 1.1191845648593317, "learning_rate": 1.1138831945363582e-07, "loss": 0.5383, "step": 14673 }, { "epoch": 1.87, "grad_norm": 1.1190589701848057, "learning_rate": 1.11171891040322e-07, "loss": 0.5317, "step": 14674 }, { "epoch": 1.87, "grad_norm": 0.6896450491543062, "learning_rate": 1.1095567072879842e-07, "loss": 0.4725, "step": 14675 }, { "epoch": 1.87, "grad_norm": 0.7614726824070114, "learning_rate": 1.1073965852826884e-07, "loss": 0.5379, "step": 14676 }, { "epoch": 1.87, "grad_norm": 0.8133514046180881, "learning_rate": 1.1052385444792757e-07, "loss": 0.4662, "step": 14677 }, { "epoch": 1.87, "grad_norm": 0.6634638277865175, "learning_rate": 1.1030825849696224e-07, "loss": 0.4668, "step": 14678 }, { "epoch": 1.87, "grad_norm": 0.6213807828620361, "learning_rate": 1.1009287068454832e-07, "loss": 0.433, "step": 14679 }, { "epoch": 1.87, "grad_norm": 0.60195928687676, "learning_rate": 1.0987769101985569e-07, "loss": 0.4477, "step": 14680 }, { "epoch": 1.87, "grad_norm": 0.7577741406079587, "learning_rate": 1.0966271951204255e-07, "loss": 0.4897, "step": 14681 }, { "epoch": 1.87, "grad_norm": 0.568465566560554, "learning_rate": 1.0944795617026105e-07, "loss": 0.4222, "step": 14682 }, { "epoch": 1.87, "grad_norm": 0.6275135842961441, "learning_rate": 1.0923340100365109e-07, "loss": 0.4667, "step": 14683 }, { "epoch": 1.87, "grad_norm": 0.5606126201968549, "learning_rate": 1.090190540213476e-07, "loss": 0.4498, "step": 14684 }, { "epoch": 1.87, "grad_norm": 0.6623376212850494, "learning_rate": 1.0880491523247272e-07, "loss": 0.4067, "step": 14685 }, { "epoch": 1.87, "grad_norm": 0.5941468232604635, "learning_rate": 1.0859098464614303e-07, "loss": 0.4051, "step": 14686 }, { "epoch": 1.87, "grad_norm": 0.6505161733037638, "learning_rate": 1.0837726227146406e-07, "loss": 0.447, "step": 14687 }, { "epoch": 1.87, "grad_norm": 0.8186006246132715, "learning_rate": 1.081637481175335e-07, "loss": 0.5273, "step": 14688 }, { "epoch": 1.87, "grad_norm": 0.683459121726743, "learning_rate": 1.079504421934402e-07, "loss": 0.4519, "step": 14689 }, { "epoch": 1.87, "grad_norm": 0.6298893599290772, "learning_rate": 1.0773734450826467e-07, "loss": 0.4293, "step": 14690 }, { "epoch": 1.87, "grad_norm": 0.6550236773155582, "learning_rate": 1.0752445507107633e-07, "loss": 0.5261, "step": 14691 }, { "epoch": 1.87, "grad_norm": 0.7941248519349838, "learning_rate": 1.0731177389093795e-07, "loss": 0.5244, "step": 14692 }, { "epoch": 1.87, "grad_norm": 1.092428051036131, "learning_rate": 1.0709930097690224e-07, "loss": 0.5099, "step": 14693 }, { "epoch": 1.87, "grad_norm": 0.5405784766135938, "learning_rate": 1.0688703633801422e-07, "loss": 0.4216, "step": 14694 }, { "epoch": 1.87, "grad_norm": 0.714987688349456, "learning_rate": 1.0667497998330889e-07, "loss": 0.4152, "step": 14695 }, { "epoch": 1.87, "grad_norm": 0.6216070114627814, "learning_rate": 1.064631319218129e-07, "loss": 0.5041, "step": 14696 }, { "epoch": 1.87, "grad_norm": 0.714805515110952, "learning_rate": 1.0625149216254349e-07, "loss": 0.4761, "step": 14697 }, { "epoch": 1.87, "grad_norm": 0.5676553574460849, "learning_rate": 1.0604006071451012e-07, "loss": 0.4346, "step": 14698 }, { "epoch": 1.87, "grad_norm": 0.6938465005047804, "learning_rate": 1.0582883758671225e-07, "loss": 0.5572, "step": 14699 }, { "epoch": 1.87, "grad_norm": 0.7408861632516022, "learning_rate": 1.0561782278814104e-07, "loss": 0.4973, "step": 14700 }, { "epoch": 1.87, "grad_norm": 0.6015476006737581, "learning_rate": 1.0540701632777984e-07, "loss": 0.5016, "step": 14701 }, { "epoch": 1.87, "grad_norm": 0.6605739639953594, "learning_rate": 1.0519641821460036e-07, "loss": 0.4779, "step": 14702 }, { "epoch": 1.87, "grad_norm": 0.6735744793470467, "learning_rate": 1.0498602845756822e-07, "loss": 0.4826, "step": 14703 }, { "epoch": 1.87, "grad_norm": 0.7096902844641421, "learning_rate": 1.0477584706563793e-07, "loss": 0.4553, "step": 14704 }, { "epoch": 1.87, "grad_norm": 0.7894137139580715, "learning_rate": 1.0456587404775787e-07, "loss": 0.4605, "step": 14705 }, { "epoch": 1.87, "grad_norm": 0.6019756563857315, "learning_rate": 1.0435610941286422e-07, "loss": 0.4003, "step": 14706 }, { "epoch": 1.87, "grad_norm": 0.5788462084372067, "learning_rate": 1.0414655316988709e-07, "loss": 0.4634, "step": 14707 }, { "epoch": 1.87, "grad_norm": 0.8200400495659477, "learning_rate": 1.0393720532774598e-07, "loss": 0.4968, "step": 14708 }, { "epoch": 1.87, "grad_norm": 0.6048910480715624, "learning_rate": 1.0372806589535267e-07, "loss": 0.5022, "step": 14709 }, { "epoch": 1.87, "grad_norm": 0.8073597234071778, "learning_rate": 1.0351913488160948e-07, "loss": 0.5373, "step": 14710 }, { "epoch": 1.87, "grad_norm": 0.7558099454352386, "learning_rate": 1.0331041229540928e-07, "loss": 0.4813, "step": 14711 }, { "epoch": 1.87, "grad_norm": 0.654071366221137, "learning_rate": 1.0310189814563831e-07, "loss": 0.3799, "step": 14712 }, { "epoch": 1.87, "grad_norm": 0.648546161177395, "learning_rate": 1.0289359244117003e-07, "loss": 0.455, "step": 14713 }, { "epoch": 1.87, "grad_norm": 0.6490084132061171, "learning_rate": 1.0268549519087345e-07, "loss": 0.4462, "step": 14714 }, { "epoch": 1.87, "grad_norm": 0.6380297898537747, "learning_rate": 1.0247760640360538e-07, "loss": 0.4434, "step": 14715 }, { "epoch": 1.87, "grad_norm": 0.6742370726522345, "learning_rate": 1.022699260882154e-07, "loss": 0.4884, "step": 14716 }, { "epoch": 1.87, "grad_norm": 0.7129588365225235, "learning_rate": 1.0206245425354422e-07, "loss": 0.4929, "step": 14717 }, { "epoch": 1.88, "grad_norm": 0.61279099131829, "learning_rate": 1.01855190908422e-07, "loss": 0.4603, "step": 14718 }, { "epoch": 1.88, "grad_norm": 0.8158423965453113, "learning_rate": 1.0164813606167279e-07, "loss": 0.5293, "step": 14719 }, { "epoch": 1.88, "grad_norm": 0.6432943421131802, "learning_rate": 1.0144128972210899e-07, "loss": 0.463, "step": 14720 }, { "epoch": 1.88, "grad_norm": 0.7726479433399925, "learning_rate": 1.0123465189853688e-07, "loss": 0.4622, "step": 14721 }, { "epoch": 1.88, "grad_norm": 0.608519224008473, "learning_rate": 1.0102822259975054e-07, "loss": 0.4636, "step": 14722 }, { "epoch": 1.88, "grad_norm": 0.7370459494787885, "learning_rate": 1.0082200183453795e-07, "loss": 0.517, "step": 14723 }, { "epoch": 1.88, "grad_norm": 0.5854832191720103, "learning_rate": 1.0061598961167707e-07, "loss": 0.4536, "step": 14724 }, { "epoch": 1.88, "grad_norm": 0.6124150378860596, "learning_rate": 1.0041018593993757e-07, "loss": 0.4236, "step": 14725 }, { "epoch": 1.88, "grad_norm": 0.6544334200855861, "learning_rate": 1.0020459082807966e-07, "loss": 0.4625, "step": 14726 }, { "epoch": 1.88, "grad_norm": 0.5748312285010804, "learning_rate": 9.999920428485466e-08, "loss": 0.4787, "step": 14727 }, { "epoch": 1.88, "grad_norm": 0.5670750079502563, "learning_rate": 9.979402631900559e-08, "loss": 0.4851, "step": 14728 }, { "epoch": 1.88, "grad_norm": 0.6938260903397196, "learning_rate": 9.958905693926602e-08, "loss": 0.5055, "step": 14729 }, { "epoch": 1.88, "grad_norm": 0.6535301392057608, "learning_rate": 9.938429615436062e-08, "loss": 0.3828, "step": 14730 }, { "epoch": 1.88, "grad_norm": 0.6338209403966464, "learning_rate": 9.917974397300578e-08, "loss": 0.4495, "step": 14731 }, { "epoch": 1.88, "grad_norm": 0.852007001393612, "learning_rate": 9.897540040390841e-08, "loss": 0.5141, "step": 14732 }, { "epoch": 1.88, "grad_norm": 0.5635582621690459, "learning_rate": 9.87712654557671e-08, "loss": 0.4778, "step": 14733 }, { "epoch": 1.88, "grad_norm": 0.6057575885945262, "learning_rate": 9.85673391372699e-08, "loss": 0.4539, "step": 14734 }, { "epoch": 1.88, "grad_norm": 0.784644875738886, "learning_rate": 9.836362145709933e-08, "loss": 0.5547, "step": 14735 }, { "epoch": 1.88, "grad_norm": 0.6908371729735631, "learning_rate": 9.816011242392565e-08, "loss": 0.5018, "step": 14736 }, { "epoch": 1.88, "grad_norm": 0.6280549959950802, "learning_rate": 9.79568120464125e-08, "loss": 0.4267, "step": 14737 }, { "epoch": 1.88, "grad_norm": 0.6447716596651293, "learning_rate": 9.775372033321185e-08, "loss": 0.4406, "step": 14738 }, { "epoch": 1.88, "grad_norm": 0.5845421670700026, "learning_rate": 9.755083729297122e-08, "loss": 0.4278, "step": 14739 }, { "epoch": 1.88, "grad_norm": 0.6147421978953678, "learning_rate": 9.734816293432481e-08, "loss": 0.4798, "step": 14740 }, { "epoch": 1.88, "grad_norm": 0.6630379805970983, "learning_rate": 9.714569726590073e-08, "loss": 0.4547, "step": 14741 }, { "epoch": 1.88, "grad_norm": 0.6427515671936322, "learning_rate": 9.694344029631708e-08, "loss": 0.4655, "step": 14742 }, { "epoch": 1.88, "grad_norm": 0.8147712839415289, "learning_rate": 9.674139203418309e-08, "loss": 0.5164, "step": 14743 }, { "epoch": 1.88, "grad_norm": 0.7467750649822994, "learning_rate": 9.653955248809966e-08, "loss": 0.5331, "step": 14744 }, { "epoch": 1.88, "grad_norm": 0.6520523399915478, "learning_rate": 9.633792166665822e-08, "loss": 0.4832, "step": 14745 }, { "epoch": 1.88, "grad_norm": 0.5829453782898939, "learning_rate": 9.61364995784414e-08, "loss": 0.4108, "step": 14746 }, { "epoch": 1.88, "grad_norm": 0.703537691163929, "learning_rate": 9.593528623202397e-08, "loss": 0.4112, "step": 14747 }, { "epoch": 1.88, "grad_norm": 0.6506339309765253, "learning_rate": 9.573428163596965e-08, "loss": 0.4731, "step": 14748 }, { "epoch": 1.88, "grad_norm": 0.9019500792948892, "learning_rate": 9.553348579883547e-08, "loss": 0.5542, "step": 14749 }, { "epoch": 1.88, "grad_norm": 1.3540133244756363, "learning_rate": 9.53328987291674e-08, "loss": 0.5477, "step": 14750 }, { "epoch": 1.88, "grad_norm": 0.7994789667579858, "learning_rate": 9.51325204355058e-08, "loss": 0.48, "step": 14751 }, { "epoch": 1.88, "grad_norm": 0.5859519652927297, "learning_rate": 9.493235092637831e-08, "loss": 0.4512, "step": 14752 }, { "epoch": 1.88, "grad_norm": 0.5666603189035199, "learning_rate": 9.473239021030701e-08, "loss": 0.4448, "step": 14753 }, { "epoch": 1.88, "grad_norm": 0.6233648746856285, "learning_rate": 9.453263829580228e-08, "loss": 0.4894, "step": 14754 }, { "epoch": 1.88, "grad_norm": 0.7441975947287928, "learning_rate": 9.433309519136846e-08, "loss": 0.461, "step": 14755 }, { "epoch": 1.88, "grad_norm": 0.5811393594191452, "learning_rate": 9.413376090549764e-08, "loss": 0.42, "step": 14756 }, { "epoch": 1.88, "grad_norm": 0.6046986136162347, "learning_rate": 9.393463544667635e-08, "loss": 0.43, "step": 14757 }, { "epoch": 1.88, "grad_norm": 0.6023373860326642, "learning_rate": 9.373571882337951e-08, "loss": 0.4543, "step": 14758 }, { "epoch": 1.88, "grad_norm": 0.6837721897423201, "learning_rate": 9.35370110440753e-08, "loss": 0.5148, "step": 14759 }, { "epoch": 1.88, "grad_norm": 0.8108834947507921, "learning_rate": 9.333851211722145e-08, "loss": 0.5419, "step": 14760 }, { "epoch": 1.88, "grad_norm": 0.7932795188605657, "learning_rate": 9.314022205126783e-08, "loss": 0.4313, "step": 14761 }, { "epoch": 1.88, "grad_norm": 0.6757557824514098, "learning_rate": 9.294214085465436e-08, "loss": 0.5215, "step": 14762 }, { "epoch": 1.88, "grad_norm": 0.8678210844675458, "learning_rate": 9.274426853581375e-08, "loss": 0.5162, "step": 14763 }, { "epoch": 1.88, "grad_norm": 0.7375845305789939, "learning_rate": 9.25466051031687e-08, "loss": 0.4754, "step": 14764 }, { "epoch": 1.88, "grad_norm": 0.6668936778896333, "learning_rate": 9.234915056513194e-08, "loss": 0.4773, "step": 14765 }, { "epoch": 1.88, "grad_norm": 0.7796994896500511, "learning_rate": 9.21519049301095e-08, "loss": 0.5042, "step": 14766 }, { "epoch": 1.88, "grad_norm": 0.660004359978936, "learning_rate": 9.195486820649746e-08, "loss": 0.4455, "step": 14767 }, { "epoch": 1.88, "grad_norm": 0.5813106536739178, "learning_rate": 9.175804040268299e-08, "loss": 0.4383, "step": 14768 }, { "epoch": 1.88, "grad_norm": 0.7549103316378822, "learning_rate": 9.156142152704328e-08, "loss": 0.4935, "step": 14769 }, { "epoch": 1.88, "grad_norm": 0.586662579142665, "learning_rate": 9.136501158794942e-08, "loss": 0.4043, "step": 14770 }, { "epoch": 1.88, "grad_norm": 0.639145714843358, "learning_rate": 9.116881059376137e-08, "loss": 0.4967, "step": 14771 }, { "epoch": 1.88, "grad_norm": 0.7158502509925139, "learning_rate": 9.097281855283025e-08, "loss": 0.4885, "step": 14772 }, { "epoch": 1.88, "grad_norm": 0.8346594002144846, "learning_rate": 9.077703547349936e-08, "loss": 0.5163, "step": 14773 }, { "epoch": 1.88, "grad_norm": 0.5706021464773952, "learning_rate": 9.058146136410262e-08, "loss": 0.4856, "step": 14774 }, { "epoch": 1.88, "grad_norm": 0.750799021219796, "learning_rate": 9.038609623296446e-08, "loss": 0.508, "step": 14775 }, { "epoch": 1.88, "grad_norm": 0.5752430646992269, "learning_rate": 9.019094008840213e-08, "loss": 0.4752, "step": 14776 }, { "epoch": 1.88, "grad_norm": 0.7068126671951016, "learning_rate": 8.999599293872118e-08, "loss": 0.4709, "step": 14777 }, { "epoch": 1.88, "grad_norm": 0.5856580475770998, "learning_rate": 8.980125479222112e-08, "loss": 0.4241, "step": 14778 }, { "epoch": 1.88, "grad_norm": 0.6008660598538302, "learning_rate": 8.960672565719031e-08, "loss": 0.4223, "step": 14779 }, { "epoch": 1.88, "grad_norm": 0.6578533751535685, "learning_rate": 8.941240554191044e-08, "loss": 0.4845, "step": 14780 }, { "epoch": 1.88, "grad_norm": 0.7604480289623786, "learning_rate": 8.921829445465213e-08, "loss": 0.5316, "step": 14781 }, { "epoch": 1.88, "grad_norm": 0.6539851619318845, "learning_rate": 8.902439240367877e-08, "loss": 0.4726, "step": 14782 }, { "epoch": 1.88, "grad_norm": 0.7228781295936539, "learning_rate": 8.883069939724321e-08, "loss": 0.5037, "step": 14783 }, { "epoch": 1.88, "grad_norm": 0.7893378711041156, "learning_rate": 8.86372154435916e-08, "loss": 0.5136, "step": 14784 }, { "epoch": 1.88, "grad_norm": 0.9607809017873757, "learning_rate": 8.844394055095851e-08, "loss": 0.5398, "step": 14785 }, { "epoch": 1.88, "grad_norm": 0.7828001483701519, "learning_rate": 8.825087472757232e-08, "loss": 0.5375, "step": 14786 }, { "epoch": 1.88, "grad_norm": 0.5986178688612013, "learning_rate": 8.805801798165037e-08, "loss": 0.4361, "step": 14787 }, { "epoch": 1.88, "grad_norm": 0.5727285481712481, "learning_rate": 8.786537032140218e-08, "loss": 0.4545, "step": 14788 }, { "epoch": 1.88, "grad_norm": 0.5848018673653969, "learning_rate": 8.767293175502845e-08, "loss": 0.3934, "step": 14789 }, { "epoch": 1.88, "grad_norm": 0.6993970402892437, "learning_rate": 8.748070229072092e-08, "loss": 0.4348, "step": 14790 }, { "epoch": 1.88, "grad_norm": 0.6188515571506418, "learning_rate": 8.728868193666084e-08, "loss": 0.4748, "step": 14791 }, { "epoch": 1.88, "grad_norm": 0.644140819700754, "learning_rate": 8.70968707010239e-08, "loss": 0.471, "step": 14792 }, { "epoch": 1.88, "grad_norm": 0.6418842838512829, "learning_rate": 8.690526859197301e-08, "loss": 0.5217, "step": 14793 }, { "epoch": 1.88, "grad_norm": 0.886560575433917, "learning_rate": 8.671387561766498e-08, "loss": 0.5582, "step": 14794 }, { "epoch": 1.88, "grad_norm": 0.8328335555261391, "learning_rate": 8.652269178624661e-08, "loss": 0.5269, "step": 14795 }, { "epoch": 1.88, "grad_norm": 0.7896752470324832, "learning_rate": 8.633171710585642e-08, "loss": 0.4871, "step": 14796 }, { "epoch": 1.89, "grad_norm": 0.7297540529528658, "learning_rate": 8.614095158462232e-08, "loss": 0.4938, "step": 14797 }, { "epoch": 1.89, "grad_norm": 0.7961457277217313, "learning_rate": 8.595039523066617e-08, "loss": 0.4896, "step": 14798 }, { "epoch": 1.89, "grad_norm": 0.6387419842114922, "learning_rate": 8.57600480520987e-08, "loss": 0.4535, "step": 14799 }, { "epoch": 1.89, "grad_norm": 0.7279711250967081, "learning_rate": 8.556991005702175e-08, "loss": 0.4712, "step": 14800 }, { "epoch": 1.89, "grad_norm": 0.6074583864569182, "learning_rate": 8.537998125352943e-08, "loss": 0.4436, "step": 14801 }, { "epoch": 1.89, "grad_norm": 0.6556853317452656, "learning_rate": 8.51902616497069e-08, "loss": 0.4472, "step": 14802 }, { "epoch": 1.89, "grad_norm": 0.755231224380534, "learning_rate": 8.500075125362938e-08, "loss": 0.5173, "step": 14803 }, { "epoch": 1.89, "grad_norm": 0.5964805822666424, "learning_rate": 8.481145007336378e-08, "loss": 0.4992, "step": 14804 }, { "epoch": 1.89, "grad_norm": 0.7092381326031915, "learning_rate": 8.462235811696807e-08, "loss": 0.5076, "step": 14805 }, { "epoch": 1.89, "grad_norm": 0.8990138206896937, "learning_rate": 8.443347539249137e-08, "loss": 0.4968, "step": 14806 }, { "epoch": 1.89, "grad_norm": 0.8477451442219737, "learning_rate": 8.424480190797335e-08, "loss": 0.5376, "step": 14807 }, { "epoch": 1.89, "grad_norm": 0.8254601002242311, "learning_rate": 8.405633767144594e-08, "loss": 0.5439, "step": 14808 }, { "epoch": 1.89, "grad_norm": 0.7054681986899233, "learning_rate": 8.386808269093105e-08, "loss": 0.4978, "step": 14809 }, { "epoch": 1.89, "grad_norm": 0.6646116362506767, "learning_rate": 8.368003697444227e-08, "loss": 0.4834, "step": 14810 }, { "epoch": 1.89, "grad_norm": 0.8430945408776408, "learning_rate": 8.349220052998375e-08, "loss": 0.49, "step": 14811 }, { "epoch": 1.89, "grad_norm": 0.6714925359350091, "learning_rate": 8.330457336555131e-08, "loss": 0.5261, "step": 14812 }, { "epoch": 1.89, "grad_norm": 0.7502689741550561, "learning_rate": 8.311715548913135e-08, "loss": 0.5429, "step": 14813 }, { "epoch": 1.89, "grad_norm": 0.8274210569876382, "learning_rate": 8.292994690870304e-08, "loss": 0.4932, "step": 14814 }, { "epoch": 1.89, "grad_norm": 0.5762236941929715, "learning_rate": 8.274294763223334e-08, "loss": 0.4202, "step": 14815 }, { "epoch": 1.89, "grad_norm": 0.6676229580015268, "learning_rate": 8.25561576676831e-08, "loss": 0.5177, "step": 14816 }, { "epoch": 1.89, "grad_norm": 0.77475697207206, "learning_rate": 8.236957702300375e-08, "loss": 0.4703, "step": 14817 }, { "epoch": 1.89, "grad_norm": 0.5634112209306116, "learning_rate": 8.21832057061367e-08, "loss": 0.4391, "step": 14818 }, { "epoch": 1.89, "grad_norm": 0.5723140581099885, "learning_rate": 8.19970437250156e-08, "loss": 0.4393, "step": 14819 }, { "epoch": 1.89, "grad_norm": 0.6743016589677784, "learning_rate": 8.181109108756468e-08, "loss": 0.4967, "step": 14820 }, { "epoch": 1.89, "grad_norm": 0.6693265421173604, "learning_rate": 8.162534780169929e-08, "loss": 0.4703, "step": 14821 }, { "epoch": 1.89, "grad_norm": 0.6336330505418651, "learning_rate": 8.143981387532585e-08, "loss": 0.4105, "step": 14822 }, { "epoch": 1.89, "grad_norm": 0.7253324004756612, "learning_rate": 8.125448931634195e-08, "loss": 0.4973, "step": 14823 }, { "epoch": 1.89, "grad_norm": 0.7106204434980367, "learning_rate": 8.106937413263683e-08, "loss": 0.5509, "step": 14824 }, { "epoch": 1.89, "grad_norm": 0.8107539030898208, "learning_rate": 8.088446833208974e-08, "loss": 0.5167, "step": 14825 }, { "epoch": 1.89, "grad_norm": 0.8889664539822925, "learning_rate": 8.069977192257106e-08, "loss": 0.568, "step": 14826 }, { "epoch": 1.89, "grad_norm": 0.7446531195314722, "learning_rate": 8.051528491194394e-08, "loss": 0.5101, "step": 14827 }, { "epoch": 1.89, "grad_norm": 0.7501849284069014, "learning_rate": 8.033100730805987e-08, "loss": 0.5625, "step": 14828 }, { "epoch": 1.89, "grad_norm": 0.8189009478244745, "learning_rate": 8.01469391187648e-08, "loss": 0.5035, "step": 14829 }, { "epoch": 1.89, "grad_norm": 0.6067542680451289, "learning_rate": 7.996308035189249e-08, "loss": 0.4813, "step": 14830 }, { "epoch": 1.89, "grad_norm": 1.2060502950645589, "learning_rate": 7.977943101526941e-08, "loss": 0.4853, "step": 14831 }, { "epoch": 1.89, "grad_norm": 0.5959618305607192, "learning_rate": 7.959599111671324e-08, "loss": 0.4753, "step": 14832 }, { "epoch": 1.89, "grad_norm": 0.6684647711936785, "learning_rate": 7.94127606640327e-08, "loss": 0.5194, "step": 14833 }, { "epoch": 1.89, "grad_norm": 0.7355758206753046, "learning_rate": 7.922973966502657e-08, "loss": 0.5188, "step": 14834 }, { "epoch": 1.89, "grad_norm": 0.5868439006034994, "learning_rate": 7.904692812748638e-08, "loss": 0.4008, "step": 14835 }, { "epoch": 1.89, "grad_norm": 0.6442299143831768, "learning_rate": 7.886432605919314e-08, "loss": 0.4743, "step": 14836 }, { "epoch": 1.89, "grad_norm": 0.7205002482045677, "learning_rate": 7.868193346791953e-08, "loss": 0.467, "step": 14837 }, { "epoch": 1.89, "grad_norm": 0.6235250592703355, "learning_rate": 7.849975036142987e-08, "loss": 0.4813, "step": 14838 }, { "epoch": 1.89, "grad_norm": 0.6474316765963949, "learning_rate": 7.83177767474791e-08, "loss": 0.4555, "step": 14839 }, { "epoch": 1.89, "grad_norm": 0.6795953748696669, "learning_rate": 7.81360126338132e-08, "loss": 0.4224, "step": 14840 }, { "epoch": 1.89, "grad_norm": 0.7418701915988909, "learning_rate": 7.795445802816937e-08, "loss": 0.501, "step": 14841 }, { "epoch": 1.89, "grad_norm": 0.7453863426736327, "learning_rate": 7.777311293827527e-08, "loss": 0.4634, "step": 14842 }, { "epoch": 1.89, "grad_norm": 0.6373181221957444, "learning_rate": 7.759197737185087e-08, "loss": 0.4512, "step": 14843 }, { "epoch": 1.89, "grad_norm": 0.6806120324380047, "learning_rate": 7.741105133660554e-08, "loss": 0.5099, "step": 14844 }, { "epoch": 1.89, "grad_norm": 0.796011258123971, "learning_rate": 7.723033484024257e-08, "loss": 0.5369, "step": 14845 }, { "epoch": 1.89, "grad_norm": 1.2717915207966952, "learning_rate": 7.704982789045245e-08, "loss": 0.504, "step": 14846 }, { "epoch": 1.89, "grad_norm": 0.6708012942415421, "learning_rate": 7.686953049492019e-08, "loss": 0.4801, "step": 14847 }, { "epoch": 1.89, "grad_norm": 0.7001904690523313, "learning_rate": 7.668944266131906e-08, "loss": 0.506, "step": 14848 }, { "epoch": 1.89, "grad_norm": 0.6905171134874772, "learning_rate": 7.650956439731627e-08, "loss": 0.4703, "step": 14849 }, { "epoch": 1.89, "grad_norm": 0.7623682225584905, "learning_rate": 7.632989571056793e-08, "loss": 0.4664, "step": 14850 }, { "epoch": 1.89, "grad_norm": 0.6293899141519566, "learning_rate": 7.61504366087229e-08, "loss": 0.5004, "step": 14851 }, { "epoch": 1.89, "grad_norm": 0.7030762750805454, "learning_rate": 7.597118709941842e-08, "loss": 0.4854, "step": 14852 }, { "epoch": 1.89, "grad_norm": 0.5624528895891809, "learning_rate": 7.579214719028616e-08, "loss": 0.4139, "step": 14853 }, { "epoch": 1.89, "grad_norm": 0.6759732162286095, "learning_rate": 7.561331688894613e-08, "loss": 0.4491, "step": 14854 }, { "epoch": 1.89, "grad_norm": 0.7068254433890429, "learning_rate": 7.543469620301169e-08, "loss": 0.4516, "step": 14855 }, { "epoch": 1.89, "grad_norm": 0.5464915036822979, "learning_rate": 7.525628514008565e-08, "loss": 0.4111, "step": 14856 }, { "epoch": 1.89, "grad_norm": 0.714751890377659, "learning_rate": 7.507808370776193e-08, "loss": 0.4637, "step": 14857 }, { "epoch": 1.89, "grad_norm": 0.8156478551970631, "learning_rate": 7.490009191362668e-08, "loss": 0.5232, "step": 14858 }, { "epoch": 1.89, "grad_norm": 1.8374209586716308, "learning_rate": 7.472230976525608e-08, "loss": 0.5135, "step": 14859 }, { "epoch": 1.89, "grad_norm": 0.8245966082865018, "learning_rate": 7.454473727021738e-08, "loss": 0.5141, "step": 14860 }, { "epoch": 1.89, "grad_norm": 0.7696905307472008, "learning_rate": 7.436737443607011e-08, "loss": 0.5076, "step": 14861 }, { "epoch": 1.89, "grad_norm": 0.601735908489089, "learning_rate": 7.419022127036324e-08, "loss": 0.4962, "step": 14862 }, { "epoch": 1.89, "grad_norm": 0.7430554894990316, "learning_rate": 7.40132777806385e-08, "loss": 0.5313, "step": 14863 }, { "epoch": 1.89, "grad_norm": 0.6974186270246291, "learning_rate": 7.383654397442707e-08, "loss": 0.513, "step": 14864 }, { "epoch": 1.89, "grad_norm": 0.7029349162663144, "learning_rate": 7.366001985925242e-08, "loss": 0.5002, "step": 14865 }, { "epoch": 1.89, "grad_norm": 0.601301562146909, "learning_rate": 7.348370544262795e-08, "loss": 0.3856, "step": 14866 }, { "epoch": 1.89, "grad_norm": 0.6177009900413402, "learning_rate": 7.330760073205989e-08, "loss": 0.4075, "step": 14867 }, { "epoch": 1.89, "grad_norm": 0.6759806983832748, "learning_rate": 7.313170573504336e-08, "loss": 0.4472, "step": 14868 }, { "epoch": 1.89, "grad_norm": 0.5993078577159227, "learning_rate": 7.295602045906625e-08, "loss": 0.4877, "step": 14869 }, { "epoch": 1.89, "grad_norm": 0.5920985456196145, "learning_rate": 7.278054491160646e-08, "loss": 0.4508, "step": 14870 }, { "epoch": 1.89, "grad_norm": 0.6952862719541483, "learning_rate": 7.260527910013416e-08, "loss": 0.4683, "step": 14871 }, { "epoch": 1.89, "grad_norm": 0.7753118168511096, "learning_rate": 7.243022303210945e-08, "loss": 0.5472, "step": 14872 }, { "epoch": 1.89, "grad_norm": 0.7649375598868541, "learning_rate": 7.225537671498362e-08, "loss": 0.4953, "step": 14873 }, { "epoch": 1.89, "grad_norm": 0.8121907386057656, "learning_rate": 7.208074015619903e-08, "loss": 0.5073, "step": 14874 }, { "epoch": 1.9, "grad_norm": 0.8317750455439437, "learning_rate": 7.190631336319087e-08, "loss": 0.5579, "step": 14875 }, { "epoch": 1.9, "grad_norm": 0.5605345486790487, "learning_rate": 7.173209634338263e-08, "loss": 0.4322, "step": 14876 }, { "epoch": 1.9, "grad_norm": 0.6992067321547882, "learning_rate": 7.155808910419116e-08, "loss": 0.5222, "step": 14877 }, { "epoch": 1.9, "grad_norm": 0.7449484326180169, "learning_rate": 7.138429165302219e-08, "loss": 0.4441, "step": 14878 }, { "epoch": 1.9, "grad_norm": 0.744594803155509, "learning_rate": 7.121070399727426e-08, "loss": 0.4661, "step": 14879 }, { "epoch": 1.9, "grad_norm": 0.7921201948449712, "learning_rate": 7.103732614433645e-08, "loss": 0.5443, "step": 14880 }, { "epoch": 1.9, "grad_norm": 0.67321367288946, "learning_rate": 7.086415810158953e-08, "loss": 0.4548, "step": 14881 }, { "epoch": 1.9, "grad_norm": 0.6360461675931228, "learning_rate": 7.069119987640371e-08, "loss": 0.4775, "step": 14882 }, { "epoch": 1.9, "grad_norm": 0.6048693362888725, "learning_rate": 7.0518451476142e-08, "loss": 0.4741, "step": 14883 }, { "epoch": 1.9, "grad_norm": 0.7513698584418961, "learning_rate": 7.034591290815684e-08, "loss": 0.4719, "step": 14884 }, { "epoch": 1.9, "grad_norm": 0.6660888498586137, "learning_rate": 7.017358417979404e-08, "loss": 0.5427, "step": 14885 }, { "epoch": 1.9, "grad_norm": 0.7756851409683403, "learning_rate": 7.00014652983877e-08, "loss": 0.525, "step": 14886 }, { "epoch": 1.9, "grad_norm": 0.8958907292383768, "learning_rate": 6.982955627126475e-08, "loss": 0.5069, "step": 14887 }, { "epoch": 1.9, "grad_norm": 0.6261890141561876, "learning_rate": 6.96578571057438e-08, "loss": 0.4678, "step": 14888 }, { "epoch": 1.9, "grad_norm": 0.7116540305907778, "learning_rate": 6.948636780913176e-08, "loss": 0.5175, "step": 14889 }, { "epoch": 1.9, "grad_norm": 0.7349857587727427, "learning_rate": 6.931508838873057e-08, "loss": 0.5514, "step": 14890 }, { "epoch": 1.9, "grad_norm": 0.7711467481676206, "learning_rate": 6.914401885182886e-08, "loss": 0.5192, "step": 14891 }, { "epoch": 1.9, "grad_norm": 0.7468347123732433, "learning_rate": 6.897315920571024e-08, "loss": 0.4779, "step": 14892 }, { "epoch": 1.9, "grad_norm": 0.5939359026442107, "learning_rate": 6.880250945764611e-08, "loss": 0.4725, "step": 14893 }, { "epoch": 1.9, "grad_norm": 0.7012443624208674, "learning_rate": 6.863206961490177e-08, "loss": 0.4983, "step": 14894 }, { "epoch": 1.9, "grad_norm": 0.8231531785860172, "learning_rate": 6.846183968473196e-08, "loss": 0.5541, "step": 14895 }, { "epoch": 1.9, "grad_norm": 0.8626442444077446, "learning_rate": 6.829181967438258e-08, "loss": 0.5373, "step": 14896 }, { "epoch": 1.9, "grad_norm": 0.755896227945002, "learning_rate": 6.812200959109117e-08, "loss": 0.5985, "step": 14897 }, { "epoch": 1.9, "grad_norm": 0.6321918320680883, "learning_rate": 6.795240944208525e-08, "loss": 0.4684, "step": 14898 }, { "epoch": 1.9, "grad_norm": 0.6273417910591694, "learning_rate": 6.778301923458463e-08, "loss": 0.4014, "step": 14899 }, { "epoch": 1.9, "grad_norm": 0.6565728344637505, "learning_rate": 6.761383897580076e-08, "loss": 0.4833, "step": 14900 }, { "epoch": 1.9, "grad_norm": 0.5948577474124414, "learning_rate": 6.744486867293287e-08, "loss": 0.4645, "step": 14901 }, { "epoch": 1.9, "grad_norm": 0.6056332279335119, "learning_rate": 6.727610833317577e-08, "loss": 0.453, "step": 14902 }, { "epoch": 1.9, "grad_norm": 0.6536346814924354, "learning_rate": 6.710755796371149e-08, "loss": 0.4156, "step": 14903 }, { "epoch": 1.9, "grad_norm": 0.5633636995915888, "learning_rate": 6.69392175717154e-08, "loss": 0.3945, "step": 14904 }, { "epoch": 1.9, "grad_norm": 0.6444581672080565, "learning_rate": 6.677108716435288e-08, "loss": 0.5005, "step": 14905 }, { "epoch": 1.9, "grad_norm": 0.8842384076787853, "learning_rate": 6.660316674878097e-08, "loss": 0.5789, "step": 14906 }, { "epoch": 1.9, "grad_norm": 0.8955669710622807, "learning_rate": 6.643545633214676e-08, "loss": 0.5929, "step": 14907 }, { "epoch": 1.9, "grad_norm": 0.7057406373814158, "learning_rate": 6.626795592159063e-08, "loss": 0.5399, "step": 14908 }, { "epoch": 1.9, "grad_norm": 0.6901954710931674, "learning_rate": 6.610066552424132e-08, "loss": 0.5157, "step": 14909 }, { "epoch": 1.9, "grad_norm": 0.7297849341764654, "learning_rate": 6.593358514722036e-08, "loss": 0.4732, "step": 14910 }, { "epoch": 1.9, "grad_norm": 0.6946051495708048, "learning_rate": 6.576671479763874e-08, "loss": 0.4821, "step": 14911 }, { "epoch": 1.9, "grad_norm": 0.7825820905137036, "learning_rate": 6.560005448260131e-08, "loss": 0.5301, "step": 14912 }, { "epoch": 1.9, "grad_norm": 0.742055419211664, "learning_rate": 6.54336042092013e-08, "loss": 0.5373, "step": 14913 }, { "epoch": 1.9, "grad_norm": 0.7288046623711072, "learning_rate": 6.526736398452472e-08, "loss": 0.4941, "step": 14914 }, { "epoch": 1.9, "grad_norm": 0.6067676193745742, "learning_rate": 6.510133381564643e-08, "loss": 0.4817, "step": 14915 }, { "epoch": 1.9, "grad_norm": 0.6841008251296871, "learning_rate": 6.493551370963524e-08, "loss": 0.4464, "step": 14916 }, { "epoch": 1.9, "grad_norm": 0.7745490458084624, "learning_rate": 6.476990367354886e-08, "loss": 0.5038, "step": 14917 }, { "epoch": 1.9, "grad_norm": 0.8910218725210496, "learning_rate": 6.460450371443716e-08, "loss": 0.5327, "step": 14918 }, { "epoch": 1.9, "grad_norm": 0.5858515493755501, "learning_rate": 6.44393138393401e-08, "loss": 0.424, "step": 14919 }, { "epoch": 1.9, "grad_norm": 0.6675758081006506, "learning_rate": 6.427433405528983e-08, "loss": 0.4647, "step": 14920 }, { "epoch": 1.9, "grad_norm": 0.6402570021566559, "learning_rate": 6.410956436930849e-08, "loss": 0.5229, "step": 14921 }, { "epoch": 1.9, "grad_norm": 0.8041011453764587, "learning_rate": 6.394500478841048e-08, "loss": 0.5131, "step": 14922 }, { "epoch": 1.9, "grad_norm": 0.7394023307467974, "learning_rate": 6.378065531960021e-08, "loss": 0.5506, "step": 14923 }, { "epoch": 1.9, "grad_norm": 0.6291391579588682, "learning_rate": 6.361651596987372e-08, "loss": 0.4398, "step": 14924 }, { "epoch": 1.9, "grad_norm": 0.6456519105857712, "learning_rate": 6.345258674621713e-08, "loss": 0.4704, "step": 14925 }, { "epoch": 1.9, "grad_norm": 0.6973500760325702, "learning_rate": 6.328886765560983e-08, "loss": 0.4446, "step": 14926 }, { "epoch": 1.9, "grad_norm": 0.6836038328795178, "learning_rate": 6.312535870501901e-08, "loss": 0.4731, "step": 14927 }, { "epoch": 1.9, "grad_norm": 0.9010758515973167, "learning_rate": 6.296205990140636e-08, "loss": 0.5183, "step": 14928 }, { "epoch": 1.9, "grad_norm": 0.7520776656583378, "learning_rate": 6.279897125172241e-08, "loss": 0.4887, "step": 14929 }, { "epoch": 1.9, "grad_norm": 0.6516671496396322, "learning_rate": 6.263609276290883e-08, "loss": 0.4605, "step": 14930 }, { "epoch": 1.9, "grad_norm": 0.6324463501168689, "learning_rate": 6.247342444189896e-08, "loss": 0.483, "step": 14931 }, { "epoch": 1.9, "grad_norm": 0.6433603158451405, "learning_rate": 6.231096629561839e-08, "loss": 0.488, "step": 14932 }, { "epoch": 1.9, "grad_norm": 0.7005267254357026, "learning_rate": 6.214871833098046e-08, "loss": 0.5379, "step": 14933 }, { "epoch": 1.9, "grad_norm": 0.8187683073146915, "learning_rate": 6.198668055489299e-08, "loss": 0.5566, "step": 14934 }, { "epoch": 1.9, "grad_norm": 0.7014506942335231, "learning_rate": 6.182485297425211e-08, "loss": 0.5403, "step": 14935 }, { "epoch": 1.9, "grad_norm": 0.7457658166839182, "learning_rate": 6.166323559594733e-08, "loss": 0.5201, "step": 14936 }, { "epoch": 1.9, "grad_norm": 0.6720903249687875, "learning_rate": 6.150182842685814e-08, "loss": 0.4732, "step": 14937 }, { "epoch": 1.9, "grad_norm": 0.682299415129687, "learning_rate": 6.134063147385516e-08, "loss": 0.4591, "step": 14938 }, { "epoch": 1.9, "grad_norm": 0.868661915401475, "learning_rate": 6.117964474379956e-08, "loss": 0.4996, "step": 14939 }, { "epoch": 1.9, "grad_norm": 0.6311212247812544, "learning_rate": 6.101886824354475e-08, "loss": 0.4863, "step": 14940 }, { "epoch": 1.9, "grad_norm": 0.8370369943956436, "learning_rate": 6.085830197993359e-08, "loss": 0.5028, "step": 14941 }, { "epoch": 1.9, "grad_norm": 0.5602474811155145, "learning_rate": 6.069794595980116e-08, "loss": 0.4298, "step": 14942 }, { "epoch": 1.9, "grad_norm": 0.7064661389182153, "learning_rate": 6.053780018997369e-08, "loss": 0.4819, "step": 14943 }, { "epoch": 1.9, "grad_norm": 0.7494532845141635, "learning_rate": 6.037786467726791e-08, "loss": 0.5117, "step": 14944 }, { "epoch": 1.9, "grad_norm": 0.7389400123854223, "learning_rate": 6.02181394284912e-08, "loss": 0.5506, "step": 14945 }, { "epoch": 1.9, "grad_norm": 0.6554405146888587, "learning_rate": 6.005862445044308e-08, "loss": 0.4828, "step": 14946 }, { "epoch": 1.9, "grad_norm": 0.6404137315404386, "learning_rate": 5.98993197499137e-08, "loss": 0.478, "step": 14947 }, { "epoch": 1.9, "grad_norm": 0.588565027593553, "learning_rate": 5.974022533368373e-08, "loss": 0.4529, "step": 14948 }, { "epoch": 1.9, "grad_norm": 0.611928012351665, "learning_rate": 5.95813412085261e-08, "loss": 0.4215, "step": 14949 }, { "epoch": 1.9, "grad_norm": 0.5764535069932035, "learning_rate": 5.942266738120262e-08, "loss": 0.4203, "step": 14950 }, { "epoch": 1.9, "grad_norm": 0.6779313510516711, "learning_rate": 5.926420385846898e-08, "loss": 0.4522, "step": 14951 }, { "epoch": 1.9, "grad_norm": 0.5531856169733247, "learning_rate": 5.910595064706981e-08, "loss": 0.4392, "step": 14952 }, { "epoch": 1.9, "grad_norm": 0.7555395400697723, "learning_rate": 5.8947907753741373e-08, "loss": 0.4786, "step": 14953 }, { "epoch": 1.91, "grad_norm": 0.9245927681111071, "learning_rate": 5.879007518521107e-08, "loss": 0.5687, "step": 14954 }, { "epoch": 1.91, "grad_norm": 0.692630364540833, "learning_rate": 5.8632452948197974e-08, "loss": 0.4956, "step": 14955 }, { "epoch": 1.91, "grad_norm": 0.6486453717229161, "learning_rate": 5.84750410494106e-08, "loss": 0.4446, "step": 14956 }, { "epoch": 1.91, "grad_norm": 0.6074495053391784, "learning_rate": 5.83178394955497e-08, "loss": 0.4785, "step": 14957 }, { "epoch": 1.91, "grad_norm": 0.8140093752770277, "learning_rate": 5.8160848293307146e-08, "loss": 0.5183, "step": 14958 }, { "epoch": 1.91, "grad_norm": 0.9798447162845862, "learning_rate": 5.800406744936538e-08, "loss": 0.5106, "step": 14959 }, { "epoch": 1.91, "grad_norm": 0.8913133183341689, "learning_rate": 5.784749697039793e-08, "loss": 0.5728, "step": 14960 }, { "epoch": 1.91, "grad_norm": 0.6320232619030288, "learning_rate": 5.769113686307004e-08, "loss": 0.4892, "step": 14961 }, { "epoch": 1.91, "grad_norm": 0.6539798660718247, "learning_rate": 5.753498713403693e-08, "loss": 0.5035, "step": 14962 }, { "epoch": 1.91, "grad_norm": 0.8204085460008044, "learning_rate": 5.737904778994552e-08, "loss": 0.5349, "step": 14963 }, { "epoch": 1.91, "grad_norm": 0.7903255207447327, "learning_rate": 5.722331883743382e-08, "loss": 0.5413, "step": 14964 }, { "epoch": 1.91, "grad_norm": 0.8745232735446167, "learning_rate": 5.706780028313041e-08, "loss": 0.5532, "step": 14965 }, { "epoch": 1.91, "grad_norm": 0.7386897592005486, "learning_rate": 5.691249213365557e-08, "loss": 0.4201, "step": 14966 }, { "epoch": 1.91, "grad_norm": 0.568925708084756, "learning_rate": 5.6757394395620115e-08, "loss": 0.4233, "step": 14967 }, { "epoch": 1.91, "grad_norm": 0.6807609701128432, "learning_rate": 5.660250707562598e-08, "loss": 0.5163, "step": 14968 }, { "epoch": 1.91, "grad_norm": 0.8079184883582976, "learning_rate": 5.644783018026623e-08, "loss": 0.5345, "step": 14969 }, { "epoch": 1.91, "grad_norm": 0.8360313316237793, "learning_rate": 5.629336371612504e-08, "loss": 0.5522, "step": 14970 }, { "epoch": 1.91, "grad_norm": 0.5774524376393603, "learning_rate": 5.61391076897777e-08, "loss": 0.4419, "step": 14971 }, { "epoch": 1.91, "grad_norm": 0.7016965974940614, "learning_rate": 5.5985062107789536e-08, "loss": 0.512, "step": 14972 }, { "epoch": 1.91, "grad_norm": 0.7041518107213289, "learning_rate": 5.583122697671917e-08, "loss": 0.4864, "step": 14973 }, { "epoch": 1.91, "grad_norm": 0.6244062015740327, "learning_rate": 5.567760230311359e-08, "loss": 0.4542, "step": 14974 }, { "epoch": 1.91, "grad_norm": 0.7675530805668184, "learning_rate": 5.552418809351312e-08, "loss": 0.526, "step": 14975 }, { "epoch": 1.91, "grad_norm": 0.7835900608022818, "learning_rate": 5.537098435444754e-08, "loss": 0.5459, "step": 14976 }, { "epoch": 1.91, "grad_norm": 0.6591989304727085, "learning_rate": 5.521799109243886e-08, "loss": 0.4764, "step": 14977 }, { "epoch": 1.91, "grad_norm": 0.7652958379058709, "learning_rate": 5.506520831399853e-08, "loss": 0.5195, "step": 14978 }, { "epoch": 1.91, "grad_norm": 0.5939310537253913, "learning_rate": 5.4912636025630793e-08, "loss": 0.4617, "step": 14979 }, { "epoch": 1.91, "grad_norm": 0.7638793108278883, "learning_rate": 5.4760274233829345e-08, "loss": 0.4976, "step": 14980 }, { "epoch": 1.91, "grad_norm": 0.7189220206032143, "learning_rate": 5.460812294508122e-08, "loss": 0.4764, "step": 14981 }, { "epoch": 1.91, "grad_norm": 0.6032077824149986, "learning_rate": 5.445618216586124e-08, "loss": 0.4553, "step": 14982 }, { "epoch": 1.91, "grad_norm": 0.5935185788396834, "learning_rate": 5.430445190263811e-08, "loss": 0.4788, "step": 14983 }, { "epoch": 1.91, "grad_norm": 0.6128528841979521, "learning_rate": 5.4152932161870564e-08, "loss": 0.4354, "step": 14984 }, { "epoch": 1.91, "grad_norm": 0.5906547430444946, "learning_rate": 5.400162295000788e-08, "loss": 0.4285, "step": 14985 }, { "epoch": 1.91, "grad_norm": 0.726404018184276, "learning_rate": 5.3850524273490465e-08, "loss": 0.4683, "step": 14986 }, { "epoch": 1.91, "grad_norm": 1.066818894707798, "learning_rate": 5.369963613875151e-08, "loss": 0.4455, "step": 14987 }, { "epoch": 1.91, "grad_norm": 0.6065391533997923, "learning_rate": 5.3548958552211985e-08, "loss": 0.4837, "step": 14988 }, { "epoch": 1.91, "grad_norm": 0.6973372812309644, "learning_rate": 5.339849152028731e-08, "loss": 0.5204, "step": 14989 }, { "epoch": 1.91, "grad_norm": 0.6660641490366468, "learning_rate": 5.3248235049381816e-08, "loss": 0.4351, "step": 14990 }, { "epoch": 1.91, "grad_norm": 0.6330370484708898, "learning_rate": 5.3098189145891487e-08, "loss": 0.4086, "step": 14991 }, { "epoch": 1.91, "grad_norm": 0.6877549367810202, "learning_rate": 5.294835381620289e-08, "loss": 0.4741, "step": 14992 }, { "epoch": 1.91, "grad_norm": 0.7395386762927578, "learning_rate": 5.2798729066694254e-08, "loss": 0.4594, "step": 14993 }, { "epoch": 1.91, "grad_norm": 0.6349469610792317, "learning_rate": 5.2649314903734926e-08, "loss": 0.4339, "step": 14994 }, { "epoch": 1.91, "grad_norm": 0.6048604196944926, "learning_rate": 5.2500111333684824e-08, "loss": 0.489, "step": 14995 }, { "epoch": 1.91, "grad_norm": 2.462588393903298, "learning_rate": 5.235111836289497e-08, "loss": 0.5126, "step": 14996 }, { "epoch": 1.91, "grad_norm": 0.7300669452763711, "learning_rate": 5.220233599770752e-08, "loss": 0.5043, "step": 14997 }, { "epoch": 1.91, "grad_norm": 0.8306850630509922, "learning_rate": 5.205376424445574e-08, "loss": 0.5171, "step": 14998 }, { "epoch": 1.91, "grad_norm": 0.6953798015843233, "learning_rate": 5.190540310946401e-08, "loss": 0.4344, "step": 14999 }, { "epoch": 1.91, "grad_norm": 0.6316238708365783, "learning_rate": 5.175725259904729e-08, "loss": 0.4948, "step": 15000 }, { "epoch": 1.91, "grad_norm": 0.7002272569212065, "learning_rate": 5.160931271951219e-08, "loss": 0.4876, "step": 15001 }, { "epoch": 1.91, "grad_norm": 0.6384808026656835, "learning_rate": 5.1461583477155355e-08, "loss": 0.471, "step": 15002 }, { "epoch": 1.91, "grad_norm": 0.833340524793641, "learning_rate": 5.131406487826618e-08, "loss": 0.5191, "step": 15003 }, { "epoch": 1.91, "grad_norm": 0.5969853225188436, "learning_rate": 5.1166756929122986e-08, "loss": 0.4549, "step": 15004 }, { "epoch": 1.91, "grad_norm": 0.729568435645927, "learning_rate": 5.101965963599742e-08, "loss": 0.4855, "step": 15005 }, { "epoch": 1.91, "grad_norm": 0.9482010831630299, "learning_rate": 5.087277300515003e-08, "loss": 0.5189, "step": 15006 }, { "epoch": 1.91, "grad_norm": 0.7049048460185638, "learning_rate": 5.07260970428336e-08, "loss": 0.5618, "step": 15007 }, { "epoch": 1.91, "grad_norm": 0.764463645937156, "learning_rate": 5.0579631755290906e-08, "loss": 0.5066, "step": 15008 }, { "epoch": 1.91, "grad_norm": 0.6190063326157494, "learning_rate": 5.043337714875807e-08, "loss": 0.4783, "step": 15009 }, { "epoch": 1.91, "grad_norm": 0.8531655425701893, "learning_rate": 5.0287333229459554e-08, "loss": 0.5135, "step": 15010 }, { "epoch": 1.91, "grad_norm": 0.7429210881776909, "learning_rate": 5.0141500003612065e-08, "loss": 0.4789, "step": 15011 }, { "epoch": 1.91, "grad_norm": 0.5933005637242175, "learning_rate": 4.999587747742341e-08, "loss": 0.4636, "step": 15012 }, { "epoch": 1.91, "grad_norm": 0.7610721647698198, "learning_rate": 4.9850465657092505e-08, "loss": 0.4787, "step": 15013 }, { "epoch": 1.91, "grad_norm": 0.5886871025917185, "learning_rate": 4.9705264548808865e-08, "loss": 0.4779, "step": 15014 }, { "epoch": 1.91, "grad_norm": 0.6272851119334298, "learning_rate": 4.956027415875309e-08, "loss": 0.4526, "step": 15015 }, { "epoch": 1.91, "grad_norm": 0.7163857286520932, "learning_rate": 4.941549449309746e-08, "loss": 0.5051, "step": 15016 }, { "epoch": 1.91, "grad_norm": 0.6590752729257551, "learning_rate": 4.9270925558003724e-08, "loss": 0.4729, "step": 15017 }, { "epoch": 1.91, "grad_norm": 0.7508025402488908, "learning_rate": 4.9126567359626954e-08, "loss": 0.5221, "step": 15018 }, { "epoch": 1.91, "grad_norm": 0.6403443207793101, "learning_rate": 4.898241990411168e-08, "loss": 0.4134, "step": 15019 }, { "epoch": 1.91, "grad_norm": 0.6718354978813944, "learning_rate": 4.883848319759299e-08, "loss": 0.423, "step": 15020 }, { "epoch": 1.91, "grad_norm": 0.5695059014383459, "learning_rate": 4.869475724619877e-08, "loss": 0.4024, "step": 15021 }, { "epoch": 1.91, "grad_norm": 0.7188255041450545, "learning_rate": 4.855124205604633e-08, "loss": 0.4895, "step": 15022 }, { "epoch": 1.91, "grad_norm": 0.6257096493981352, "learning_rate": 4.840793763324525e-08, "loss": 0.4601, "step": 15023 }, { "epoch": 1.91, "grad_norm": 0.6756371983664469, "learning_rate": 4.826484398389564e-08, "loss": 0.4461, "step": 15024 }, { "epoch": 1.91, "grad_norm": 0.7013822120330978, "learning_rate": 4.812196111408707e-08, "loss": 0.4145, "step": 15025 }, { "epoch": 1.91, "grad_norm": 0.7339902411636045, "learning_rate": 4.7979289029903564e-08, "loss": 0.4675, "step": 15026 }, { "epoch": 1.91, "grad_norm": 0.8209441477779372, "learning_rate": 4.783682773741693e-08, "loss": 0.5542, "step": 15027 }, { "epoch": 1.91, "grad_norm": 0.9814002994776293, "learning_rate": 4.769457724269233e-08, "loss": 0.5422, "step": 15028 }, { "epoch": 1.91, "grad_norm": 0.7354161644462337, "learning_rate": 4.755253755178324e-08, "loss": 0.5991, "step": 15029 }, { "epoch": 1.91, "grad_norm": 0.7010868862589991, "learning_rate": 4.7410708670737604e-08, "loss": 0.5179, "step": 15030 }, { "epoch": 1.91, "grad_norm": 0.7516793032780991, "learning_rate": 4.7269090605591705e-08, "loss": 0.5194, "step": 15031 }, { "epoch": 1.92, "grad_norm": 0.7801187931317424, "learning_rate": 4.7127683362374054e-08, "loss": 0.4995, "step": 15032 }, { "epoch": 1.92, "grad_norm": 0.7097783882844523, "learning_rate": 4.6986486947103727e-08, "loss": 0.4646, "step": 15033 }, { "epoch": 1.92, "grad_norm": 0.9470278723585889, "learning_rate": 4.684550136579091e-08, "loss": 0.4892, "step": 15034 }, { "epoch": 1.92, "grad_norm": 0.6596478687463553, "learning_rate": 4.670472662443692e-08, "loss": 0.5235, "step": 15035 }, { "epoch": 1.92, "grad_norm": 0.739220387836991, "learning_rate": 4.6564162729034744e-08, "loss": 0.5087, "step": 15036 }, { "epoch": 1.92, "grad_norm": 0.8196384778955311, "learning_rate": 4.642380968556681e-08, "loss": 0.4545, "step": 15037 }, { "epoch": 1.92, "grad_norm": 0.5335238858695424, "learning_rate": 4.628366750000834e-08, "loss": 0.431, "step": 15038 }, { "epoch": 1.92, "grad_norm": 0.6496730205189513, "learning_rate": 4.6143736178324015e-08, "loss": 0.419, "step": 15039 }, { "epoch": 1.92, "grad_norm": 0.5645535137277602, "learning_rate": 4.600401572647073e-08, "loss": 0.4337, "step": 15040 }, { "epoch": 1.92, "grad_norm": 0.7098938481474737, "learning_rate": 4.586450615039595e-08, "loss": 0.507, "step": 15041 }, { "epoch": 1.92, "grad_norm": 0.9347313033029958, "learning_rate": 4.57252074560377e-08, "loss": 0.4892, "step": 15042 }, { "epoch": 1.92, "grad_norm": 0.6198634262664375, "learning_rate": 4.558611964932569e-08, "loss": 0.4315, "step": 15043 }, { "epoch": 1.92, "grad_norm": 0.8177453484272174, "learning_rate": 4.544724273618128e-08, "loss": 0.4651, "step": 15044 }, { "epoch": 1.92, "grad_norm": 0.6028854444891685, "learning_rate": 4.530857672251421e-08, "loss": 0.4454, "step": 15045 }, { "epoch": 1.92, "grad_norm": 0.7389051200227712, "learning_rate": 4.517012161422918e-08, "loss": 0.5017, "step": 15046 }, { "epoch": 1.92, "grad_norm": 0.6965988236157229, "learning_rate": 4.503187741721759e-08, "loss": 0.5282, "step": 15047 }, { "epoch": 1.92, "grad_norm": 0.7812060198510323, "learning_rate": 4.489384413736586e-08, "loss": 0.4941, "step": 15048 }, { "epoch": 1.92, "grad_norm": 0.7416309313859637, "learning_rate": 4.475602178054872e-08, "loss": 0.5548, "step": 15049 }, { "epoch": 1.92, "grad_norm": 0.7493343111475596, "learning_rate": 4.4618410352633145e-08, "loss": 0.4794, "step": 15050 }, { "epoch": 1.92, "grad_norm": 0.6979806510573066, "learning_rate": 4.448100985947612e-08, "loss": 0.4792, "step": 15051 }, { "epoch": 1.92, "grad_norm": 0.6095705395258522, "learning_rate": 4.4343820306927966e-08, "loss": 0.4203, "step": 15052 }, { "epoch": 1.92, "grad_norm": 0.9921475192984033, "learning_rate": 4.420684170082679e-08, "loss": 0.5349, "step": 15053 }, { "epoch": 1.92, "grad_norm": 0.8052205774906821, "learning_rate": 4.407007404700403e-08, "loss": 0.5336, "step": 15054 }, { "epoch": 1.92, "grad_norm": 0.6002551302397064, "learning_rate": 4.393351735128115e-08, "loss": 0.4017, "step": 15055 }, { "epoch": 1.92, "grad_norm": 0.7010150690772438, "learning_rate": 4.379717161947128e-08, "loss": 0.4811, "step": 15056 }, { "epoch": 1.92, "grad_norm": 0.556608300657739, "learning_rate": 4.3661036857378106e-08, "loss": 0.4198, "step": 15057 }, { "epoch": 1.92, "grad_norm": 0.5970872968248255, "learning_rate": 4.352511307079643e-08, "loss": 0.5028, "step": 15058 }, { "epoch": 1.92, "grad_norm": 0.7168932275868205, "learning_rate": 4.3389400265511637e-08, "loss": 0.4554, "step": 15059 }, { "epoch": 1.92, "grad_norm": 0.5955553849517922, "learning_rate": 4.325389844730132e-08, "loss": 0.4662, "step": 15060 }, { "epoch": 1.92, "grad_norm": 0.7133414847752445, "learning_rate": 4.3118607621933095e-08, "loss": 0.5091, "step": 15061 }, { "epoch": 1.92, "grad_norm": 0.7830092627916168, "learning_rate": 4.298352779516568e-08, "loss": 0.549, "step": 15062 }, { "epoch": 1.92, "grad_norm": 0.9090852699373508, "learning_rate": 4.284865897274948e-08, "loss": 0.5228, "step": 15063 }, { "epoch": 1.92, "grad_norm": 0.7466767444306134, "learning_rate": 4.27140011604249e-08, "loss": 0.531, "step": 15064 }, { "epoch": 1.92, "grad_norm": 0.6971594318416546, "learning_rate": 4.257955436392347e-08, "loss": 0.5483, "step": 15065 }, { "epoch": 1.92, "grad_norm": 0.5472134728209681, "learning_rate": 4.24453185889695e-08, "loss": 0.431, "step": 15066 }, { "epoch": 1.92, "grad_norm": 0.5951914904238146, "learning_rate": 4.231129384127619e-08, "loss": 0.4303, "step": 15067 }, { "epoch": 1.92, "grad_norm": 0.6751018780895542, "learning_rate": 4.217748012654843e-08, "loss": 0.4364, "step": 15068 }, { "epoch": 1.92, "grad_norm": 0.6563827559803558, "learning_rate": 4.204387745048222e-08, "loss": 0.4583, "step": 15069 }, { "epoch": 1.92, "grad_norm": 0.6178967161015908, "learning_rate": 4.191048581876522e-08, "loss": 0.5236, "step": 15070 }, { "epoch": 1.92, "grad_norm": 0.7976546318858023, "learning_rate": 4.1777305237074575e-08, "loss": 0.6215, "step": 15071 }, { "epoch": 1.92, "grad_norm": 0.8157436366688258, "learning_rate": 4.164433571108017e-08, "loss": 0.5231, "step": 15072 }, { "epoch": 1.92, "grad_norm": 1.5973061595558957, "learning_rate": 4.151157724644195e-08, "loss": 0.5363, "step": 15073 }, { "epoch": 1.92, "grad_norm": 0.763903433970657, "learning_rate": 4.137902984881037e-08, "loss": 0.5697, "step": 15074 }, { "epoch": 1.92, "grad_norm": 0.7314755798080833, "learning_rate": 4.1246693523828154e-08, "loss": 0.5657, "step": 15075 }, { "epoch": 1.92, "grad_norm": 0.8063977317043424, "learning_rate": 4.111456827712801e-08, "loss": 0.4989, "step": 15076 }, { "epoch": 1.92, "grad_norm": 0.8485229866844652, "learning_rate": 4.098265411433488e-08, "loss": 0.4942, "step": 15077 }, { "epoch": 1.92, "grad_norm": 0.7611432400832915, "learning_rate": 4.085095104106318e-08, "loss": 0.53, "step": 15078 }, { "epoch": 1.92, "grad_norm": 0.7674628413554013, "learning_rate": 4.071945906291952e-08, "loss": 0.5077, "step": 15079 }, { "epoch": 1.92, "grad_norm": 0.601550829219238, "learning_rate": 4.0588178185500536e-08, "loss": 0.4479, "step": 15080 }, { "epoch": 1.92, "grad_norm": 0.5746068619185233, "learning_rate": 4.045710841439454e-08, "loss": 0.5372, "step": 15081 }, { "epoch": 1.92, "grad_norm": 0.774811649541241, "learning_rate": 4.032624975518151e-08, "loss": 0.5921, "step": 15082 }, { "epoch": 1.92, "grad_norm": 0.8242845686613779, "learning_rate": 4.0195602213430886e-08, "loss": 0.5477, "step": 15083 }, { "epoch": 1.92, "grad_norm": 0.6764596937218885, "learning_rate": 4.006516579470487e-08, "loss": 0.4967, "step": 15084 }, { "epoch": 1.92, "grad_norm": 0.730378408856767, "learning_rate": 3.99349405045546e-08, "loss": 0.5107, "step": 15085 }, { "epoch": 1.92, "grad_norm": 0.7001733538658945, "learning_rate": 3.98049263485234e-08, "loss": 0.4534, "step": 15086 }, { "epoch": 1.92, "grad_norm": 0.619604887408877, "learning_rate": 3.967512333214685e-08, "loss": 0.4614, "step": 15087 }, { "epoch": 1.92, "grad_norm": 0.7770469006502133, "learning_rate": 3.9545531460948304e-08, "loss": 0.5162, "step": 15088 }, { "epoch": 1.92, "grad_norm": 0.7798586321512747, "learning_rate": 3.9416150740446135e-08, "loss": 0.5644, "step": 15089 }, { "epoch": 1.92, "grad_norm": 0.7134361038689274, "learning_rate": 3.928698117614593e-08, "loss": 0.5173, "step": 15090 }, { "epoch": 1.92, "grad_norm": 0.7825613362240432, "learning_rate": 3.915802277354774e-08, "loss": 0.5679, "step": 15091 }, { "epoch": 1.92, "grad_norm": 0.7872418935931574, "learning_rate": 3.902927553813884e-08, "loss": 0.5565, "step": 15092 }, { "epoch": 1.92, "grad_norm": 0.7012484508885712, "learning_rate": 3.8900739475401515e-08, "loss": 0.4308, "step": 15093 }, { "epoch": 1.92, "grad_norm": 0.6295932237967935, "learning_rate": 3.877241459080583e-08, "loss": 0.4294, "step": 15094 }, { "epoch": 1.92, "grad_norm": 1.0678126992422639, "learning_rate": 3.864430088981464e-08, "loss": 0.4212, "step": 15095 }, { "epoch": 1.92, "grad_norm": 0.8030009640009687, "learning_rate": 3.851639837788135e-08, "loss": 0.6139, "step": 15096 }, { "epoch": 1.92, "grad_norm": 0.786407477709147, "learning_rate": 3.83887070604505e-08, "loss": 0.4984, "step": 15097 }, { "epoch": 1.92, "grad_norm": 0.6657858723741097, "learning_rate": 3.82612269429572e-08, "loss": 0.4637, "step": 15098 }, { "epoch": 1.92, "grad_norm": 0.7309336242438761, "learning_rate": 3.8133958030828197e-08, "loss": 0.485, "step": 15099 }, { "epoch": 1.92, "grad_norm": 0.6500471803160319, "learning_rate": 3.800690032948029e-08, "loss": 0.4632, "step": 15100 }, { "epoch": 1.92, "grad_norm": 0.7186290999966025, "learning_rate": 3.788005384432303e-08, "loss": 0.4621, "step": 15101 }, { "epoch": 1.92, "grad_norm": 0.6587741660298142, "learning_rate": 3.7753418580754874e-08, "loss": 0.428, "step": 15102 }, { "epoch": 1.92, "grad_norm": 0.6537558268011637, "learning_rate": 3.7626994544167074e-08, "loss": 0.5362, "step": 15103 }, { "epoch": 1.92, "grad_norm": 0.8419135376197172, "learning_rate": 3.7500781739939765e-08, "loss": 0.5577, "step": 15104 }, { "epoch": 1.92, "grad_norm": 0.6081987559826507, "learning_rate": 3.737478017344698e-08, "loss": 0.4407, "step": 15105 }, { "epoch": 1.92, "grad_norm": 0.7513813111456606, "learning_rate": 3.7248989850051096e-08, "loss": 0.5061, "step": 15106 }, { "epoch": 1.92, "grad_norm": 0.7807383807379263, "learning_rate": 3.712341077510728e-08, "loss": 0.5424, "step": 15107 }, { "epoch": 1.92, "grad_norm": 0.8415746874879882, "learning_rate": 3.69980429539607e-08, "loss": 0.5133, "step": 15108 }, { "epoch": 1.92, "grad_norm": 0.8029499437514626, "learning_rate": 3.687288639194819e-08, "loss": 0.4693, "step": 15109 }, { "epoch": 1.92, "grad_norm": 0.7330127580051987, "learning_rate": 3.674794109439661e-08, "loss": 0.4772, "step": 15110 }, { "epoch": 1.93, "grad_norm": 0.8371894643006117, "learning_rate": 3.662320706662503e-08, "loss": 0.5611, "step": 15111 }, { "epoch": 1.93, "grad_norm": 0.7621198719054024, "learning_rate": 3.64986843139431e-08, "loss": 0.5392, "step": 15112 }, { "epoch": 1.93, "grad_norm": 0.6861766372398184, "learning_rate": 3.637437284165102e-08, "loss": 0.5122, "step": 15113 }, { "epoch": 1.93, "grad_norm": 0.5995690993641924, "learning_rate": 3.625027265504011e-08, "loss": 0.4577, "step": 15114 }, { "epoch": 1.93, "grad_norm": 0.6166949403981787, "learning_rate": 3.612638375939337e-08, "loss": 0.4781, "step": 15115 }, { "epoch": 1.93, "grad_norm": 0.817337161987696, "learning_rate": 3.6002706159984354e-08, "loss": 0.5128, "step": 15116 }, { "epoch": 1.93, "grad_norm": 1.0409312096195078, "learning_rate": 3.58792398620772e-08, "loss": 0.5602, "step": 15117 }, { "epoch": 1.93, "grad_norm": 0.8017006842416546, "learning_rate": 3.575598487092824e-08, "loss": 0.4814, "step": 15118 }, { "epoch": 1.93, "grad_norm": 0.60218003380905, "learning_rate": 3.563294119178329e-08, "loss": 0.4458, "step": 15119 }, { "epoch": 1.93, "grad_norm": 0.6091587335760976, "learning_rate": 3.551010882987982e-08, "loss": 0.4561, "step": 15120 }, { "epoch": 1.93, "grad_norm": 0.657525484411186, "learning_rate": 3.538748779044698e-08, "loss": 0.5461, "step": 15121 }, { "epoch": 1.93, "grad_norm": 0.5678543638681082, "learning_rate": 3.526507807870394e-08, "loss": 0.4615, "step": 15122 }, { "epoch": 1.93, "grad_norm": 0.6885401959328727, "learning_rate": 3.5142879699862075e-08, "loss": 0.4859, "step": 15123 }, { "epoch": 1.93, "grad_norm": 0.685624351295359, "learning_rate": 3.5020892659122227e-08, "loss": 0.496, "step": 15124 }, { "epoch": 1.93, "grad_norm": 0.6104174385104225, "learning_rate": 3.4899116961676915e-08, "loss": 0.4632, "step": 15125 }, { "epoch": 1.93, "grad_norm": 0.6740429520748329, "learning_rate": 3.477755261271032e-08, "loss": 0.412, "step": 15126 }, { "epoch": 1.93, "grad_norm": 0.6031448243111859, "learning_rate": 3.4656199617396634e-08, "loss": 0.5119, "step": 15127 }, { "epoch": 1.93, "grad_norm": 0.686044387291408, "learning_rate": 3.453505798090118e-08, "loss": 0.5067, "step": 15128 }, { "epoch": 1.93, "grad_norm": 0.6022159604281908, "learning_rate": 3.4414127708381486e-08, "loss": 0.4222, "step": 15129 }, { "epoch": 1.93, "grad_norm": 0.6436773362958829, "learning_rate": 3.429340880498455e-08, "loss": 0.4697, "step": 15130 }, { "epoch": 1.93, "grad_norm": 0.7444085587839969, "learning_rate": 3.417290127584905e-08, "loss": 0.4652, "step": 15131 }, { "epoch": 1.93, "grad_norm": 0.6533086088540342, "learning_rate": 3.40526051261042e-08, "loss": 0.4419, "step": 15132 }, { "epoch": 1.93, "grad_norm": 0.631092543540407, "learning_rate": 3.393252036087147e-08, "loss": 0.4554, "step": 15133 }, { "epoch": 1.93, "grad_norm": 0.8004894050648322, "learning_rate": 3.381264698526232e-08, "loss": 0.4693, "step": 15134 }, { "epoch": 1.93, "grad_norm": 0.6201017885017608, "learning_rate": 3.369298500437823e-08, "loss": 0.4178, "step": 15135 }, { "epoch": 1.93, "grad_norm": 0.6692435634857152, "learning_rate": 3.3573534423314566e-08, "loss": 0.489, "step": 15136 }, { "epoch": 1.93, "grad_norm": 0.7009792558448507, "learning_rate": 3.345429524715504e-08, "loss": 0.4755, "step": 15137 }, { "epoch": 1.93, "grad_norm": 0.6077973637217287, "learning_rate": 3.333526748097504e-08, "loss": 0.4553, "step": 15138 }, { "epoch": 1.93, "grad_norm": 0.8064126954227931, "learning_rate": 3.321645112984162e-08, "loss": 0.56, "step": 15139 }, { "epoch": 1.93, "grad_norm": 0.6865226379354565, "learning_rate": 3.30978461988124e-08, "loss": 0.5202, "step": 15140 }, { "epoch": 1.93, "grad_norm": 0.6912271254583112, "learning_rate": 3.2979452692935564e-08, "loss": 0.4998, "step": 15141 }, { "epoch": 1.93, "grad_norm": 0.7785870317201594, "learning_rate": 3.286127061725153e-08, "loss": 0.5802, "step": 15142 }, { "epoch": 1.93, "grad_norm": 0.7239954111496669, "learning_rate": 3.274329997679015e-08, "loss": 0.5253, "step": 15143 }, { "epoch": 1.93, "grad_norm": 0.7068506598867735, "learning_rate": 3.262554077657354e-08, "loss": 0.5573, "step": 15144 }, { "epoch": 1.93, "grad_norm": 0.6814547461449372, "learning_rate": 3.250799302161434e-08, "loss": 0.5075, "step": 15145 }, { "epoch": 1.93, "grad_norm": 0.6762965880286381, "learning_rate": 3.239065671691577e-08, "loss": 0.5323, "step": 15146 }, { "epoch": 1.93, "grad_norm": 0.6344828799313355, "learning_rate": 3.227353186747273e-08, "loss": 0.5104, "step": 15147 }, { "epoch": 1.93, "grad_norm": 0.8000468444701336, "learning_rate": 3.215661847827123e-08, "loss": 0.5283, "step": 15148 }, { "epoch": 1.93, "grad_norm": 0.6793220076268363, "learning_rate": 3.2039916554287284e-08, "loss": 0.5133, "step": 15149 }, { "epoch": 1.93, "grad_norm": 0.7721999341275275, "learning_rate": 3.1923426100489154e-08, "loss": 0.5219, "step": 15150 }, { "epoch": 1.93, "grad_norm": 0.6791765080030236, "learning_rate": 3.1807147121834525e-08, "loss": 0.4618, "step": 15151 }, { "epoch": 1.93, "grad_norm": 0.8227888483707223, "learning_rate": 3.169107962327389e-08, "loss": 0.5243, "step": 15152 }, { "epoch": 1.93, "grad_norm": 0.6899356256320979, "learning_rate": 3.157522360974719e-08, "loss": 0.491, "step": 15153 }, { "epoch": 1.93, "grad_norm": 0.7250527840865344, "learning_rate": 3.1459579086186575e-08, "loss": 0.4296, "step": 15154 }, { "epoch": 1.93, "grad_norm": 0.5988313813691181, "learning_rate": 3.134414605751479e-08, "loss": 0.4399, "step": 15155 }, { "epoch": 1.93, "grad_norm": 0.6998010076395835, "learning_rate": 3.1228924528645124e-08, "loss": 0.5285, "step": 15156 }, { "epoch": 1.93, "grad_norm": 0.8146918347420816, "learning_rate": 3.111391450448198e-08, "loss": 0.5633, "step": 15157 }, { "epoch": 1.93, "grad_norm": 0.9153852335974512, "learning_rate": 3.099911598992145e-08, "loss": 0.5231, "step": 15158 }, { "epoch": 1.93, "grad_norm": 0.754671750318938, "learning_rate": 3.088452898984906e-08, "loss": 0.5091, "step": 15159 }, { "epoch": 1.93, "grad_norm": 0.6006864457034206, "learning_rate": 3.0770153509144255e-08, "loss": 0.4408, "step": 15160 }, { "epoch": 1.93, "grad_norm": 0.6433534629537412, "learning_rate": 3.0655989552674234e-08, "loss": 0.49, "step": 15161 }, { "epoch": 1.93, "grad_norm": 0.7854670035750826, "learning_rate": 3.0542037125299015e-08, "loss": 0.5641, "step": 15162 }, { "epoch": 1.93, "grad_norm": 0.7731244973043242, "learning_rate": 3.0428296231869156e-08, "loss": 0.5294, "step": 15163 }, { "epoch": 1.93, "grad_norm": 0.7779043017155192, "learning_rate": 3.0314766877226345e-08, "loss": 0.4522, "step": 15164 }, { "epoch": 1.93, "grad_norm": 0.5689427252586934, "learning_rate": 3.020144906620281e-08, "loss": 0.4019, "step": 15165 }, { "epoch": 1.93, "grad_norm": 0.7379909431822576, "learning_rate": 3.0088342803622496e-08, "loss": 0.4521, "step": 15166 }, { "epoch": 1.93, "grad_norm": 0.6580081749131689, "learning_rate": 2.9975448094299866e-08, "loss": 0.4524, "step": 15167 }, { "epoch": 1.93, "grad_norm": 0.6154317371448199, "learning_rate": 2.9862764943040524e-08, "loss": 0.5266, "step": 15168 }, { "epoch": 1.93, "grad_norm": 0.8909765151191803, "learning_rate": 2.975029335464119e-08, "loss": 0.524, "step": 15169 }, { "epoch": 1.93, "grad_norm": 0.6228535873031579, "learning_rate": 2.963803333388915e-08, "loss": 0.4481, "step": 15170 }, { "epoch": 1.93, "grad_norm": 0.5727286696956554, "learning_rate": 2.9525984885562796e-08, "loss": 0.4249, "step": 15171 }, { "epoch": 1.93, "grad_norm": 0.5888982615923285, "learning_rate": 2.9414148014431653e-08, "loss": 0.4176, "step": 15172 }, { "epoch": 1.93, "grad_norm": 0.5919559214086101, "learning_rate": 2.930252272525691e-08, "loss": 0.4858, "step": 15173 }, { "epoch": 1.93, "grad_norm": 0.7849524135584427, "learning_rate": 2.9191109022789764e-08, "loss": 0.516, "step": 15174 }, { "epoch": 1.93, "grad_norm": 0.575829848285687, "learning_rate": 2.907990691177254e-08, "loss": 0.4066, "step": 15175 }, { "epoch": 1.93, "grad_norm": 0.6985896956148122, "learning_rate": 2.8968916396939216e-08, "loss": 0.4862, "step": 15176 }, { "epoch": 1.93, "grad_norm": 0.6581841937590546, "learning_rate": 2.885813748301325e-08, "loss": 0.4417, "step": 15177 }, { "epoch": 1.93, "grad_norm": 0.6479669282352425, "learning_rate": 2.8747570174711414e-08, "loss": 0.4893, "step": 15178 }, { "epoch": 1.93, "grad_norm": 0.9254527545830931, "learning_rate": 2.8637214476739394e-08, "loss": 0.5563, "step": 15179 }, { "epoch": 1.93, "grad_norm": 0.7815461895941868, "learning_rate": 2.8527070393795653e-08, "loss": 0.5393, "step": 15180 }, { "epoch": 1.93, "grad_norm": 0.7347016781758972, "learning_rate": 2.8417137930566995e-08, "loss": 0.4932, "step": 15181 }, { "epoch": 1.93, "grad_norm": 0.6159591427285297, "learning_rate": 2.8307417091734678e-08, "loss": 0.4553, "step": 15182 }, { "epoch": 1.93, "grad_norm": 0.5746674932683945, "learning_rate": 2.8197907881967745e-08, "loss": 0.4552, "step": 15183 }, { "epoch": 1.93, "grad_norm": 0.6371529456176857, "learning_rate": 2.8088610305928576e-08, "loss": 0.4573, "step": 15184 }, { "epoch": 1.93, "grad_norm": 0.7046731360710549, "learning_rate": 2.7979524368269008e-08, "loss": 0.4514, "step": 15185 }, { "epoch": 1.93, "grad_norm": 0.6304270102663132, "learning_rate": 2.78706500736331e-08, "loss": 0.4386, "step": 15186 }, { "epoch": 1.93, "grad_norm": 0.6129060282197543, "learning_rate": 2.7761987426654922e-08, "loss": 0.4278, "step": 15187 }, { "epoch": 1.93, "grad_norm": 0.5824216887245995, "learning_rate": 2.7653536431960226e-08, "loss": 0.411, "step": 15188 }, { "epoch": 1.94, "grad_norm": 0.6889716476035775, "learning_rate": 2.7545297094164204e-08, "loss": 0.4305, "step": 15189 }, { "epoch": 1.94, "grad_norm": 0.6190228610216427, "learning_rate": 2.7437269417875945e-08, "loss": 0.489, "step": 15190 }, { "epoch": 1.94, "grad_norm": 0.7402123949847493, "learning_rate": 2.7329453407692884e-08, "loss": 0.4877, "step": 15191 }, { "epoch": 1.94, "grad_norm": 0.7284389662175788, "learning_rate": 2.7221849068205242e-08, "loss": 0.4636, "step": 15192 }, { "epoch": 1.94, "grad_norm": 0.5719630955623257, "learning_rate": 2.7114456403991574e-08, "loss": 0.4737, "step": 15193 }, { "epoch": 1.94, "grad_norm": 0.8267101575424242, "learning_rate": 2.7007275419625446e-08, "loss": 0.4947, "step": 15194 }, { "epoch": 1.94, "grad_norm": 0.666768710671184, "learning_rate": 2.690030611966765e-08, "loss": 0.4412, "step": 15195 }, { "epoch": 1.94, "grad_norm": 0.6343363020705602, "learning_rate": 2.679354850867233e-08, "loss": 0.4389, "step": 15196 }, { "epoch": 1.94, "grad_norm": 0.7451455893921176, "learning_rate": 2.6687002591183618e-08, "loss": 0.5744, "step": 15197 }, { "epoch": 1.94, "grad_norm": 0.6802548431049624, "learning_rate": 2.658066837173623e-08, "loss": 0.4956, "step": 15198 }, { "epoch": 1.94, "grad_norm": 0.65304279383065, "learning_rate": 2.6474545854857647e-08, "loss": 0.5055, "step": 15199 }, { "epoch": 1.94, "grad_norm": 0.6577064120412172, "learning_rate": 2.6368635045064263e-08, "loss": 0.4756, "step": 15200 }, { "epoch": 1.94, "grad_norm": 0.5569512112963161, "learning_rate": 2.6262935946864688e-08, "loss": 0.4464, "step": 15201 }, { "epoch": 1.94, "grad_norm": 0.6537965984334853, "learning_rate": 2.6157448564758104e-08, "loss": 0.4887, "step": 15202 }, { "epoch": 1.94, "grad_norm": 0.8616581121633766, "learning_rate": 2.6052172903234807e-08, "loss": 0.5468, "step": 15203 }, { "epoch": 1.94, "grad_norm": 0.7563367429959754, "learning_rate": 2.5947108966775658e-08, "loss": 0.5189, "step": 15204 }, { "epoch": 1.94, "grad_norm": 0.5996483208752759, "learning_rate": 2.5842256759853745e-08, "loss": 0.4327, "step": 15205 }, { "epoch": 1.94, "grad_norm": 0.6900547849752002, "learning_rate": 2.573761628693161e-08, "loss": 0.4925, "step": 15206 }, { "epoch": 1.94, "grad_norm": 0.6759890180407595, "learning_rate": 2.5633187552464024e-08, "loss": 0.4378, "step": 15207 }, { "epoch": 1.94, "grad_norm": 0.625177616978941, "learning_rate": 2.552897056089576e-08, "loss": 0.4375, "step": 15208 }, { "epoch": 1.94, "grad_norm": 0.712631726798787, "learning_rate": 2.542496531666272e-08, "loss": 0.5022, "step": 15209 }, { "epoch": 1.94, "grad_norm": 0.7223553518385404, "learning_rate": 2.532117182419247e-08, "loss": 0.4722, "step": 15210 }, { "epoch": 1.94, "grad_norm": 0.6446765199211136, "learning_rate": 2.52175900879037e-08, "loss": 0.4238, "step": 15211 }, { "epoch": 1.94, "grad_norm": 0.630331984884774, "learning_rate": 2.5114220112204545e-08, "loss": 0.4148, "step": 15212 }, { "epoch": 1.94, "grad_norm": 0.6137194971298634, "learning_rate": 2.501106190149538e-08, "loss": 0.5171, "step": 15213 }, { "epoch": 1.94, "grad_norm": 0.683897235530842, "learning_rate": 2.4908115460167693e-08, "loss": 0.4758, "step": 15214 }, { "epoch": 1.94, "grad_norm": 0.672326116122611, "learning_rate": 2.4805380792604084e-08, "loss": 0.4947, "step": 15215 }, { "epoch": 1.94, "grad_norm": 0.8064845537953526, "learning_rate": 2.4702857903176058e-08, "loss": 0.6367, "step": 15216 }, { "epoch": 1.94, "grad_norm": 1.5570005496147856, "learning_rate": 2.4600546796249015e-08, "loss": 0.5737, "step": 15217 }, { "epoch": 1.94, "grad_norm": 0.7116614593276176, "learning_rate": 2.4498447476177245e-08, "loss": 0.5028, "step": 15218 }, { "epoch": 1.94, "grad_norm": 0.618554322087599, "learning_rate": 2.439655994730783e-08, "loss": 0.4495, "step": 15219 }, { "epoch": 1.94, "grad_norm": 0.6099869779923377, "learning_rate": 2.429488421397619e-08, "loss": 0.4877, "step": 15220 }, { "epoch": 1.94, "grad_norm": 0.7758041532385076, "learning_rate": 2.4193420280511638e-08, "loss": 0.5107, "step": 15221 }, { "epoch": 1.94, "grad_norm": 0.6254012159346839, "learning_rate": 2.409216815123294e-08, "loss": 0.5245, "step": 15222 }, { "epoch": 1.94, "grad_norm": 0.8328277544441479, "learning_rate": 2.3991127830449988e-08, "loss": 0.5213, "step": 15223 }, { "epoch": 1.94, "grad_norm": 0.6347028269082315, "learning_rate": 2.3890299322463782e-08, "loss": 0.495, "step": 15224 }, { "epoch": 1.94, "grad_norm": 0.7184898855252131, "learning_rate": 2.3789682631565892e-08, "loss": 0.4418, "step": 15225 }, { "epoch": 1.94, "grad_norm": 0.6758837951426744, "learning_rate": 2.3689277762039554e-08, "loss": 0.4899, "step": 15226 }, { "epoch": 1.94, "grad_norm": 1.2132250579635022, "learning_rate": 2.3589084718158572e-08, "loss": 0.4901, "step": 15227 }, { "epoch": 1.94, "grad_norm": 0.7517175139535525, "learning_rate": 2.348910350418787e-08, "loss": 0.464, "step": 15228 }, { "epoch": 1.94, "grad_norm": 3.8030407714099104, "learning_rate": 2.338933412438349e-08, "loss": 0.5099, "step": 15229 }, { "epoch": 1.94, "grad_norm": 0.8018773308511897, "learning_rate": 2.328977658299203e-08, "loss": 0.466, "step": 15230 }, { "epoch": 1.94, "grad_norm": 0.6630419908848498, "learning_rate": 2.319043088425177e-08, "loss": 0.5088, "step": 15231 }, { "epoch": 1.94, "grad_norm": 0.7900888185458365, "learning_rate": 2.3091297032391546e-08, "loss": 0.5282, "step": 15232 }, { "epoch": 1.94, "grad_norm": 0.7126695694820613, "learning_rate": 2.299237503163021e-08, "loss": 0.5144, "step": 15233 }, { "epoch": 1.94, "grad_norm": 0.6919151367835308, "learning_rate": 2.289366488617939e-08, "loss": 0.4818, "step": 15234 }, { "epoch": 1.94, "grad_norm": 0.8602035496163812, "learning_rate": 2.2795166600240726e-08, "loss": 0.5126, "step": 15235 }, { "epoch": 1.94, "grad_norm": 0.6636086464205939, "learning_rate": 2.2696880178006974e-08, "loss": 0.4927, "step": 15236 }, { "epoch": 1.94, "grad_norm": 0.8808540719208796, "learning_rate": 2.259880562366201e-08, "loss": 0.5207, "step": 15237 }, { "epoch": 1.94, "grad_norm": 0.6949340649903301, "learning_rate": 2.2500942941380277e-08, "loss": 0.5074, "step": 15238 }, { "epoch": 1.94, "grad_norm": 0.6770042657100963, "learning_rate": 2.2403292135327882e-08, "loss": 0.4878, "step": 15239 }, { "epoch": 1.94, "grad_norm": 0.6388400099250967, "learning_rate": 2.2305853209660944e-08, "loss": 0.4624, "step": 15240 }, { "epoch": 1.94, "grad_norm": 0.6441928729694157, "learning_rate": 2.2208626168527257e-08, "loss": 0.5111, "step": 15241 }, { "epoch": 1.94, "grad_norm": 0.7268764127998242, "learning_rate": 2.2111611016065738e-08, "loss": 0.5357, "step": 15242 }, { "epoch": 1.94, "grad_norm": 0.9096503544826413, "learning_rate": 2.2014807756405855e-08, "loss": 0.4591, "step": 15243 }, { "epoch": 1.94, "grad_norm": 0.6531758393056352, "learning_rate": 2.19182163936682e-08, "loss": 0.4685, "step": 15244 }, { "epoch": 1.94, "grad_norm": 0.7036748991295088, "learning_rate": 2.182183693196449e-08, "loss": 0.4793, "step": 15245 }, { "epoch": 1.94, "grad_norm": 0.7771571774891451, "learning_rate": 2.1725669375396998e-08, "loss": 0.4718, "step": 15246 }, { "epoch": 1.94, "grad_norm": 0.6331745180864147, "learning_rate": 2.1629713728059666e-08, "loss": 0.4606, "step": 15247 }, { "epoch": 1.94, "grad_norm": 0.838590672200832, "learning_rate": 2.1533969994037007e-08, "loss": 0.6019, "step": 15248 }, { "epoch": 1.94, "grad_norm": 0.8229855135501254, "learning_rate": 2.1438438177404096e-08, "loss": 0.5364, "step": 15249 }, { "epoch": 1.94, "grad_norm": 0.840939777325044, "learning_rate": 2.134311828222768e-08, "loss": 0.5629, "step": 15250 }, { "epoch": 1.94, "grad_norm": 0.6730559522128766, "learning_rate": 2.124801031256507e-08, "loss": 0.4544, "step": 15251 }, { "epoch": 1.94, "grad_norm": 0.6144974555358002, "learning_rate": 2.1153114272465248e-08, "loss": 0.4417, "step": 15252 }, { "epoch": 1.94, "grad_norm": 0.6467599414434273, "learning_rate": 2.1058430165967203e-08, "loss": 0.4905, "step": 15253 }, { "epoch": 1.94, "grad_norm": 1.0712406597832085, "learning_rate": 2.0963957997101048e-08, "loss": 0.4852, "step": 15254 }, { "epoch": 1.94, "grad_norm": 0.6297197890448283, "learning_rate": 2.0869697769888565e-08, "loss": 0.4757, "step": 15255 }, { "epoch": 1.94, "grad_norm": 0.6073561903795146, "learning_rate": 2.0775649488342096e-08, "loss": 0.405, "step": 15256 }, { "epoch": 1.94, "grad_norm": 0.6278974794901263, "learning_rate": 2.0681813156465113e-08, "loss": 0.4665, "step": 15257 }, { "epoch": 1.94, "grad_norm": 0.8255880736195185, "learning_rate": 2.0588188778251085e-08, "loss": 0.4992, "step": 15258 }, { "epoch": 1.94, "grad_norm": 0.676491634360038, "learning_rate": 2.0494776357686265e-08, "loss": 0.4745, "step": 15259 }, { "epoch": 1.94, "grad_norm": 0.7022896985870205, "learning_rate": 2.0401575898746916e-08, "loss": 0.4791, "step": 15260 }, { "epoch": 1.94, "grad_norm": 0.9428337945152696, "learning_rate": 2.0308587405399316e-08, "loss": 0.4954, "step": 15261 }, { "epoch": 1.94, "grad_norm": 0.6336312463277565, "learning_rate": 2.021581088160307e-08, "loss": 0.474, "step": 15262 }, { "epoch": 1.94, "grad_norm": 0.7379603827815319, "learning_rate": 2.0123246331306134e-08, "loss": 0.5335, "step": 15263 }, { "epoch": 1.94, "grad_norm": 0.6417366737970948, "learning_rate": 2.0030893758449244e-08, "loss": 0.4786, "step": 15264 }, { "epoch": 1.94, "grad_norm": 0.6719104239523251, "learning_rate": 1.9938753166963698e-08, "loss": 0.5212, "step": 15265 }, { "epoch": 1.94, "grad_norm": 0.7189385763117473, "learning_rate": 1.984682456077136e-08, "loss": 0.5515, "step": 15266 }, { "epoch": 1.94, "grad_norm": 0.7019790830345607, "learning_rate": 1.975510794378521e-08, "loss": 0.4935, "step": 15267 }, { "epoch": 1.95, "grad_norm": 0.7206571453478153, "learning_rate": 1.9663603319909907e-08, "loss": 0.496, "step": 15268 }, { "epoch": 1.95, "grad_norm": 0.630521697484535, "learning_rate": 1.957231069304011e-08, "loss": 0.4525, "step": 15269 }, { "epoch": 1.95, "grad_norm": 0.8215063428670665, "learning_rate": 1.9481230067061595e-08, "loss": 0.4807, "step": 15270 }, { "epoch": 1.95, "grad_norm": 0.5494633156702019, "learning_rate": 1.939036144585127e-08, "loss": 0.4581, "step": 15271 }, { "epoch": 1.95, "grad_norm": 0.5721474768314673, "learning_rate": 1.9299704833278253e-08, "loss": 0.4417, "step": 15272 }, { "epoch": 1.95, "grad_norm": 0.7032865055770813, "learning_rate": 1.920926023320058e-08, "loss": 0.5008, "step": 15273 }, { "epoch": 1.95, "grad_norm": 0.607676618960558, "learning_rate": 1.911902764946849e-08, "loss": 0.4978, "step": 15274 }, { "epoch": 1.95, "grad_norm": 0.9203291049070627, "learning_rate": 1.9029007085922258e-08, "loss": 0.5208, "step": 15275 }, { "epoch": 1.95, "grad_norm": 0.6728718897494084, "learning_rate": 1.893919854639492e-08, "loss": 0.5126, "step": 15276 }, { "epoch": 1.95, "grad_norm": 0.6884069951671887, "learning_rate": 1.884960203470787e-08, "loss": 0.5192, "step": 15277 }, { "epoch": 1.95, "grad_norm": 0.7250198684772472, "learning_rate": 1.8760217554676386e-08, "loss": 0.5199, "step": 15278 }, { "epoch": 1.95, "grad_norm": 0.6558883622624511, "learning_rate": 1.8671045110104645e-08, "loss": 0.4328, "step": 15279 }, { "epoch": 1.95, "grad_norm": 0.6036197839334017, "learning_rate": 1.8582084704788505e-08, "loss": 0.4655, "step": 15280 }, { "epoch": 1.95, "grad_norm": 0.8022229687007842, "learning_rate": 1.849333634251438e-08, "loss": 0.53, "step": 15281 }, { "epoch": 1.95, "grad_norm": 0.7307003826427637, "learning_rate": 1.840480002706091e-08, "loss": 0.4854, "step": 15282 }, { "epoch": 1.95, "grad_norm": 0.8591947746128857, "learning_rate": 1.831647576219564e-08, "loss": 0.4662, "step": 15283 }, { "epoch": 1.95, "grad_norm": 0.6729225543465089, "learning_rate": 1.8228363551678897e-08, "loss": 0.4568, "step": 15284 }, { "epoch": 1.95, "grad_norm": 0.7381538037498684, "learning_rate": 1.814046339926101e-08, "loss": 0.4982, "step": 15285 }, { "epoch": 1.95, "grad_norm": 0.6308476435558426, "learning_rate": 1.8052775308684545e-08, "loss": 0.4193, "step": 15286 }, { "epoch": 1.95, "grad_norm": 0.7139992583931533, "learning_rate": 1.796529928368096e-08, "loss": 0.4799, "step": 15287 }, { "epoch": 1.95, "grad_norm": 0.7268150561175787, "learning_rate": 1.7878035327974497e-08, "loss": 0.5195, "step": 15288 }, { "epoch": 1.95, "grad_norm": 0.5852799538375456, "learning_rate": 1.779098344527941e-08, "loss": 0.4794, "step": 15289 }, { "epoch": 1.95, "grad_norm": 0.7046481940167714, "learning_rate": 1.7704143639301062e-08, "loss": 0.4539, "step": 15290 }, { "epoch": 1.95, "grad_norm": 0.6755830292288256, "learning_rate": 1.7617515913736504e-08, "loss": 0.4497, "step": 15291 }, { "epoch": 1.95, "grad_norm": 0.6475062662096357, "learning_rate": 1.7531100272272784e-08, "loss": 0.4599, "step": 15292 }, { "epoch": 1.95, "grad_norm": 0.9423503130580584, "learning_rate": 1.7444896718588065e-08, "loss": 0.4923, "step": 15293 }, { "epoch": 1.95, "grad_norm": 0.8317170445250361, "learning_rate": 1.7358905256352754e-08, "loss": 0.5424, "step": 15294 }, { "epoch": 1.95, "grad_norm": 0.6992754460936552, "learning_rate": 1.727312588922614e-08, "loss": 0.5047, "step": 15295 }, { "epoch": 1.95, "grad_norm": 0.6405389485493949, "learning_rate": 1.71875586208603e-08, "loss": 0.4435, "step": 15296 }, { "epoch": 1.95, "grad_norm": 0.5611011068040442, "learning_rate": 1.710220345489677e-08, "loss": 0.4852, "step": 15297 }, { "epoch": 1.95, "grad_norm": 0.6827641729468165, "learning_rate": 1.7017060394969864e-08, "loss": 0.4647, "step": 15298 }, { "epoch": 1.95, "grad_norm": 0.8219551341421978, "learning_rate": 1.693212944470335e-08, "loss": 0.452, "step": 15299 }, { "epoch": 1.95, "grad_norm": 1.0012065631250422, "learning_rate": 1.6847410607712666e-08, "loss": 0.5671, "step": 15300 }, { "epoch": 1.95, "grad_norm": 0.7387130814905374, "learning_rate": 1.6762903887603266e-08, "loss": 0.5613, "step": 15301 }, { "epoch": 1.95, "grad_norm": 0.935170784194422, "learning_rate": 1.667860928797338e-08, "loss": 0.4996, "step": 15302 }, { "epoch": 1.95, "grad_norm": 0.689422251991122, "learning_rate": 1.659452681241014e-08, "loss": 0.4792, "step": 15303 }, { "epoch": 1.95, "grad_norm": 0.8696859383885638, "learning_rate": 1.6510656464494013e-08, "loss": 0.5264, "step": 15304 }, { "epoch": 1.95, "grad_norm": 0.8728075530070583, "learning_rate": 1.6426998247793812e-08, "loss": 0.5747, "step": 15305 }, { "epoch": 1.95, "grad_norm": 0.5964939644230437, "learning_rate": 1.6343552165871134e-08, "loss": 0.5361, "step": 15306 }, { "epoch": 1.95, "grad_norm": 0.8083147190104392, "learning_rate": 1.6260318222277582e-08, "loss": 0.5589, "step": 15307 }, { "epoch": 1.95, "grad_norm": 0.6673563712350532, "learning_rate": 1.617729642055699e-08, "loss": 0.4583, "step": 15308 }, { "epoch": 1.95, "grad_norm": 0.7159851516914955, "learning_rate": 1.6094486764242635e-08, "loss": 0.461, "step": 15309 }, { "epoch": 1.95, "grad_norm": 0.6302132692521711, "learning_rate": 1.6011889256860037e-08, "loss": 0.4564, "step": 15310 }, { "epoch": 1.95, "grad_norm": 0.7789845046980934, "learning_rate": 1.5929503901924714e-08, "loss": 0.5663, "step": 15311 }, { "epoch": 1.95, "grad_norm": 0.8276699184024041, "learning_rate": 1.5847330702943308e-08, "loss": 0.558, "step": 15312 }, { "epoch": 1.95, "grad_norm": 0.7743876366340604, "learning_rate": 1.5765369663414132e-08, "loss": 0.5143, "step": 15313 }, { "epoch": 1.95, "grad_norm": 0.6375782894907097, "learning_rate": 1.5683620786825503e-08, "loss": 0.476, "step": 15314 }, { "epoch": 1.95, "grad_norm": 0.8455457022094818, "learning_rate": 1.5602084076657974e-08, "loss": 0.5069, "step": 15315 }, { "epoch": 1.95, "grad_norm": 0.7320879546703841, "learning_rate": 1.55207595363821e-08, "loss": 0.5342, "step": 15316 }, { "epoch": 1.95, "grad_norm": 0.6684737198358068, "learning_rate": 1.5439647169458448e-08, "loss": 0.4776, "step": 15317 }, { "epoch": 1.95, "grad_norm": 0.6526058070075972, "learning_rate": 1.5358746979341476e-08, "loss": 0.4788, "step": 15318 }, { "epoch": 1.95, "grad_norm": 0.681620523598696, "learning_rate": 1.527805896947343e-08, "loss": 0.4914, "step": 15319 }, { "epoch": 1.95, "grad_norm": 0.7416819886647016, "learning_rate": 1.5197583143289894e-08, "loss": 0.531, "step": 15320 }, { "epoch": 1.95, "grad_norm": 0.6941317373820866, "learning_rate": 1.5117319504216467e-08, "loss": 0.4968, "step": 15321 }, { "epoch": 1.95, "grad_norm": 0.6142902179683555, "learning_rate": 1.5037268055668743e-08, "loss": 0.4416, "step": 15322 }, { "epoch": 1.95, "grad_norm": 0.5518686207359406, "learning_rate": 1.4957428801055106e-08, "loss": 0.4222, "step": 15323 }, { "epoch": 1.95, "grad_norm": 0.7366287649100153, "learning_rate": 1.4877801743773401e-08, "loss": 0.4765, "step": 15324 }, { "epoch": 1.95, "grad_norm": 0.5814649900446742, "learning_rate": 1.4798386887213689e-08, "loss": 0.4362, "step": 15325 }, { "epoch": 1.95, "grad_norm": 0.6366668416900101, "learning_rate": 1.4719184234756601e-08, "loss": 0.4599, "step": 15326 }, { "epoch": 1.95, "grad_norm": 0.6722971084453189, "learning_rate": 1.4640193789772772e-08, "loss": 0.4137, "step": 15327 }, { "epoch": 1.95, "grad_norm": 0.6914106726830133, "learning_rate": 1.456141555562507e-08, "loss": 0.4341, "step": 15328 }, { "epoch": 1.95, "grad_norm": 0.6934503474604018, "learning_rate": 1.4482849535666921e-08, "loss": 0.4584, "step": 15329 }, { "epoch": 1.95, "grad_norm": 1.0373977804284868, "learning_rate": 1.4404495733242318e-08, "loss": 0.4896, "step": 15330 }, { "epoch": 1.95, "grad_norm": 0.7923474458620506, "learning_rate": 1.4326354151686373e-08, "loss": 0.5541, "step": 15331 }, { "epoch": 1.95, "grad_norm": 0.736716104706477, "learning_rate": 1.4248424794325866e-08, "loss": 0.5212, "step": 15332 }, { "epoch": 1.95, "grad_norm": 0.8338164521862803, "learning_rate": 1.417070766447759e-08, "loss": 0.5367, "step": 15333 }, { "epoch": 1.95, "grad_norm": 0.8132862657521611, "learning_rate": 1.4093202765450008e-08, "loss": 0.5011, "step": 15334 }, { "epoch": 1.95, "grad_norm": 0.5888161754247748, "learning_rate": 1.4015910100542151e-08, "loss": 0.4985, "step": 15335 }, { "epoch": 1.95, "grad_norm": 0.8848298995269954, "learning_rate": 1.3938829673043608e-08, "loss": 0.4976, "step": 15336 }, { "epoch": 1.95, "grad_norm": 0.7286656442883906, "learning_rate": 1.3861961486236753e-08, "loss": 0.4896, "step": 15337 }, { "epoch": 1.95, "grad_norm": 0.7578121658828687, "learning_rate": 1.3785305543392302e-08, "loss": 0.559, "step": 15338 }, { "epoch": 1.95, "grad_norm": 0.7886590126506068, "learning_rate": 1.3708861847773758e-08, "loss": 0.4477, "step": 15339 }, { "epoch": 1.95, "grad_norm": 0.6218499796525958, "learning_rate": 1.3632630402634628e-08, "loss": 0.4519, "step": 15340 }, { "epoch": 1.95, "grad_norm": 0.6972417378941916, "learning_rate": 1.3556611211221205e-08, "loss": 0.4753, "step": 15341 }, { "epoch": 1.95, "grad_norm": 0.7303204371323909, "learning_rate": 1.3480804276767568e-08, "loss": 0.5195, "step": 15342 }, { "epoch": 1.95, "grad_norm": 0.6394913820948551, "learning_rate": 1.340520960250169e-08, "loss": 0.4355, "step": 15343 }, { "epoch": 1.95, "grad_norm": 0.6889033559606808, "learning_rate": 1.3329827191641554e-08, "loss": 0.5029, "step": 15344 }, { "epoch": 1.95, "grad_norm": 0.7718574307111272, "learning_rate": 1.3254657047395148e-08, "loss": 0.4802, "step": 15345 }, { "epoch": 1.96, "grad_norm": 1.0055140757577994, "learning_rate": 1.317969917296269e-08, "loss": 0.4087, "step": 15346 }, { "epoch": 1.96, "grad_norm": 0.6684948521388439, "learning_rate": 1.3104953571534962e-08, "loss": 0.4606, "step": 15347 }, { "epoch": 1.96, "grad_norm": 0.575629897323177, "learning_rate": 1.3030420246293307e-08, "loss": 0.4347, "step": 15348 }, { "epoch": 1.96, "grad_norm": 0.6483693963134761, "learning_rate": 1.2956099200410744e-08, "loss": 0.4668, "step": 15349 }, { "epoch": 1.96, "grad_norm": 0.802412213327078, "learning_rate": 1.288199043705085e-08, "loss": 0.5286, "step": 15350 }, { "epoch": 1.96, "grad_norm": 0.6211407858394298, "learning_rate": 1.2808093959368329e-08, "loss": 0.4656, "step": 15351 }, { "epoch": 1.96, "grad_norm": 0.834666229312351, "learning_rate": 1.2734409770507883e-08, "loss": 0.5527, "step": 15352 }, { "epoch": 1.96, "grad_norm": 0.8157392419328129, "learning_rate": 1.2660937873607004e-08, "loss": 0.5198, "step": 15353 }, { "epoch": 1.96, "grad_norm": 0.6710858887251512, "learning_rate": 1.258767827179208e-08, "loss": 0.5212, "step": 15354 }, { "epoch": 1.96, "grad_norm": 0.7584866816346882, "learning_rate": 1.251463096818284e-08, "loss": 0.5199, "step": 15355 }, { "epoch": 1.96, "grad_norm": 0.6252458041680651, "learning_rate": 1.2441795965887905e-08, "loss": 0.4967, "step": 15356 }, { "epoch": 1.96, "grad_norm": 0.7857273864194603, "learning_rate": 1.2369173268007573e-08, "loss": 0.4846, "step": 15357 }, { "epoch": 1.96, "grad_norm": 0.6521730481827944, "learning_rate": 1.2296762877633817e-08, "loss": 0.4721, "step": 15358 }, { "epoch": 1.96, "grad_norm": 0.7894272784747022, "learning_rate": 1.222456479784806e-08, "loss": 0.5257, "step": 15359 }, { "epoch": 1.96, "grad_norm": 0.6480261976109505, "learning_rate": 1.2152579031723954e-08, "loss": 0.476, "step": 15360 }, { "epoch": 1.96, "grad_norm": 0.8920759433292329, "learning_rate": 1.2080805582325716e-08, "loss": 0.5406, "step": 15361 }, { "epoch": 1.96, "grad_norm": 0.7032669095279237, "learning_rate": 1.2009244452708124e-08, "loss": 0.5027, "step": 15362 }, { "epoch": 1.96, "grad_norm": 0.6046918906777227, "learning_rate": 1.1937895645918184e-08, "loss": 0.4615, "step": 15363 }, { "epoch": 1.96, "grad_norm": 0.7561777401174997, "learning_rate": 1.1866759164992359e-08, "loss": 0.521, "step": 15364 }, { "epoch": 1.96, "grad_norm": 0.6444359026099157, "learning_rate": 1.1795835012958779e-08, "loss": 0.4449, "step": 15365 }, { "epoch": 1.96, "grad_norm": 0.6600681661903032, "learning_rate": 1.1725123192836696e-08, "loss": 0.4531, "step": 15366 }, { "epoch": 1.96, "grad_norm": 0.6889505842753189, "learning_rate": 1.165462370763537e-08, "loss": 0.4613, "step": 15367 }, { "epoch": 1.96, "grad_norm": 0.6069788178995846, "learning_rate": 1.158433656035629e-08, "loss": 0.4646, "step": 15368 }, { "epoch": 1.96, "grad_norm": 0.8373871214567915, "learning_rate": 1.1514261753991507e-08, "loss": 0.5686, "step": 15369 }, { "epoch": 1.96, "grad_norm": 0.7023300198718433, "learning_rate": 1.1444399291523633e-08, "loss": 0.4324, "step": 15370 }, { "epoch": 1.96, "grad_norm": 0.7240253359331733, "learning_rate": 1.1374749175926958e-08, "loss": 0.5115, "step": 15371 }, { "epoch": 1.96, "grad_norm": 0.7616361149206801, "learning_rate": 1.130531141016522e-08, "loss": 0.5245, "step": 15372 }, { "epoch": 1.96, "grad_norm": 0.7503471287124828, "learning_rate": 1.1236085997194946e-08, "loss": 0.5455, "step": 15373 }, { "epoch": 1.96, "grad_norm": 0.9518899327957615, "learning_rate": 1.1167072939962664e-08, "loss": 0.5881, "step": 15374 }, { "epoch": 1.96, "grad_norm": 0.6484233585251715, "learning_rate": 1.1098272241406582e-08, "loss": 0.4752, "step": 15375 }, { "epoch": 1.96, "grad_norm": 0.6543535485950833, "learning_rate": 1.1029683904454358e-08, "loss": 0.5074, "step": 15376 }, { "epoch": 1.96, "grad_norm": 0.7808210785891895, "learning_rate": 1.0961307932025878e-08, "loss": 0.5518, "step": 15377 }, { "epoch": 1.96, "grad_norm": 0.7855689823508912, "learning_rate": 1.0893144327032145e-08, "loss": 0.5335, "step": 15378 }, { "epoch": 1.96, "grad_norm": 0.709480625389903, "learning_rate": 1.0825193092374175e-08, "loss": 0.5071, "step": 15379 }, { "epoch": 1.96, "grad_norm": 0.6186994508311541, "learning_rate": 1.0757454230944652e-08, "loss": 0.4743, "step": 15380 }, { "epoch": 1.96, "grad_norm": 0.8036145491647295, "learning_rate": 1.0689927745626826e-08, "loss": 0.4901, "step": 15381 }, { "epoch": 1.96, "grad_norm": 0.5673742960491402, "learning_rate": 1.062261363929562e-08, "loss": 0.4275, "step": 15382 }, { "epoch": 1.96, "grad_norm": 0.5629649112249802, "learning_rate": 1.0555511914815409e-08, "loss": 0.4474, "step": 15383 }, { "epoch": 1.96, "grad_norm": 0.6752925944983087, "learning_rate": 1.048862257504335e-08, "loss": 0.4568, "step": 15384 }, { "epoch": 1.96, "grad_norm": 0.6354403511582347, "learning_rate": 1.0421945622826613e-08, "loss": 0.4128, "step": 15385 }, { "epoch": 1.96, "grad_norm": 0.8597126764028046, "learning_rate": 1.0355481061002926e-08, "loss": 0.5588, "step": 15386 }, { "epoch": 1.96, "grad_norm": 0.7445880733099759, "learning_rate": 1.0289228892402247e-08, "loss": 0.5092, "step": 15387 }, { "epoch": 1.96, "grad_norm": 0.80873677543833, "learning_rate": 1.022318911984399e-08, "loss": 0.5739, "step": 15388 }, { "epoch": 1.96, "grad_norm": 0.7066286447466447, "learning_rate": 1.0157361746139239e-08, "loss": 0.4843, "step": 15389 }, { "epoch": 1.96, "grad_norm": 0.7098383291113136, "learning_rate": 1.009174677409075e-08, "loss": 0.5008, "step": 15390 }, { "epoch": 1.96, "grad_norm": 0.6945387663898919, "learning_rate": 1.0026344206490735e-08, "loss": 0.4721, "step": 15391 }, { "epoch": 1.96, "grad_norm": 0.7210412784993058, "learning_rate": 9.961154046124188e-09, "loss": 0.5066, "step": 15392 }, { "epoch": 1.96, "grad_norm": 0.8698754532639096, "learning_rate": 9.896176295765004e-09, "loss": 0.5143, "step": 15393 }, { "epoch": 1.96, "grad_norm": 0.567892747868869, "learning_rate": 9.831410958179299e-09, "loss": 0.4359, "step": 15394 }, { "epoch": 1.96, "grad_norm": 0.6536852841837884, "learning_rate": 9.766858036124316e-09, "loss": 0.4578, "step": 15395 }, { "epoch": 1.96, "grad_norm": 0.7165125199203151, "learning_rate": 9.702517532347855e-09, "loss": 0.4697, "step": 15396 }, { "epoch": 1.96, "grad_norm": 0.6506756729204348, "learning_rate": 9.638389449588282e-09, "loss": 0.5101, "step": 15397 }, { "epoch": 1.96, "grad_norm": 0.6665809398001928, "learning_rate": 9.574473790576188e-09, "loss": 0.483, "step": 15398 }, { "epoch": 1.96, "grad_norm": 0.7980816132355706, "learning_rate": 9.510770558031069e-09, "loss": 0.4885, "step": 15399 }, { "epoch": 1.96, "grad_norm": 0.5973088084072024, "learning_rate": 9.447279754665195e-09, "loss": 0.4534, "step": 15400 }, { "epoch": 1.96, "grad_norm": 0.5997814979765954, "learning_rate": 9.384001383180853e-09, "loss": 0.467, "step": 15401 }, { "epoch": 1.96, "grad_norm": 0.6078889417149492, "learning_rate": 9.320935446271994e-09, "loss": 0.4451, "step": 15402 }, { "epoch": 1.96, "grad_norm": 0.7030184978523135, "learning_rate": 9.258081946623142e-09, "loss": 0.4957, "step": 15403 }, { "epoch": 1.96, "grad_norm": 0.7027397861098216, "learning_rate": 9.195440886909934e-09, "loss": 0.4821, "step": 15404 }, { "epoch": 1.96, "grad_norm": 0.6735819181232409, "learning_rate": 9.133012269798013e-09, "loss": 0.5077, "step": 15405 }, { "epoch": 1.96, "grad_norm": 0.7813130757645751, "learning_rate": 9.070796097945255e-09, "loss": 0.4901, "step": 15406 }, { "epoch": 1.96, "grad_norm": 0.696465723319402, "learning_rate": 9.008792373999543e-09, "loss": 0.502, "step": 15407 }, { "epoch": 1.96, "grad_norm": 0.9097889080451066, "learning_rate": 8.947001100601538e-09, "loss": 0.6298, "step": 15408 }, { "epoch": 1.96, "grad_norm": 0.8586276081913551, "learning_rate": 8.88542228038025e-09, "loss": 0.5717, "step": 15409 }, { "epoch": 1.96, "grad_norm": 0.67525253359303, "learning_rate": 8.82405591595692e-09, "loss": 0.4767, "step": 15410 }, { "epoch": 1.96, "grad_norm": 0.695730219971246, "learning_rate": 8.762902009943896e-09, "loss": 0.4943, "step": 15411 }, { "epoch": 1.96, "grad_norm": 0.6193284783696161, "learning_rate": 8.70196056494521e-09, "loss": 0.4385, "step": 15412 }, { "epoch": 1.96, "grad_norm": 0.6991850106325351, "learning_rate": 8.641231583553233e-09, "loss": 0.4682, "step": 15413 }, { "epoch": 1.96, "grad_norm": 0.7058518822892855, "learning_rate": 8.580715068354784e-09, "loss": 0.4813, "step": 15414 }, { "epoch": 1.96, "grad_norm": 0.7041943159168729, "learning_rate": 8.520411021924468e-09, "loss": 0.5217, "step": 15415 }, { "epoch": 1.96, "grad_norm": 0.6846317964642599, "learning_rate": 8.460319446829679e-09, "loss": 0.4817, "step": 15416 }, { "epoch": 1.96, "grad_norm": 0.6122948438159073, "learning_rate": 8.400440345628924e-09, "loss": 0.4604, "step": 15417 }, { "epoch": 1.96, "grad_norm": 0.7477841588263554, "learning_rate": 8.340773720870166e-09, "loss": 0.512, "step": 15418 }, { "epoch": 1.96, "grad_norm": 0.6407671858172576, "learning_rate": 8.281319575093594e-09, "loss": 0.4367, "step": 15419 }, { "epoch": 1.96, "grad_norm": 0.8353943385827931, "learning_rate": 8.222077910829962e-09, "loss": 0.4536, "step": 15420 }, { "epoch": 1.96, "grad_norm": 0.6126494933116331, "learning_rate": 8.163048730601142e-09, "loss": 0.486, "step": 15421 }, { "epoch": 1.96, "grad_norm": 0.7818349022620179, "learning_rate": 8.104232036919568e-09, "loss": 0.558, "step": 15422 }, { "epoch": 1.96, "grad_norm": 0.6121138576293279, "learning_rate": 8.04562783228935e-09, "loss": 0.5013, "step": 15423 }, { "epoch": 1.96, "grad_norm": 0.7584698734196719, "learning_rate": 7.987236119204599e-09, "loss": 0.4983, "step": 15424 }, { "epoch": 1.97, "grad_norm": 0.7518382447434007, "learning_rate": 7.92905690015111e-09, "loss": 0.4735, "step": 15425 }, { "epoch": 1.97, "grad_norm": 0.712757630298834, "learning_rate": 7.871090177605233e-09, "loss": 0.4868, "step": 15426 }, { "epoch": 1.97, "grad_norm": 0.6018627315021788, "learning_rate": 7.81333595403444e-09, "loss": 0.4419, "step": 15427 }, { "epoch": 1.97, "grad_norm": 0.6402812950157167, "learning_rate": 7.755794231897318e-09, "loss": 0.5078, "step": 15428 }, { "epoch": 1.97, "grad_norm": 0.7903713423874995, "learning_rate": 7.698465013642465e-09, "loss": 0.5192, "step": 15429 }, { "epoch": 1.97, "grad_norm": 0.7254721871692754, "learning_rate": 7.641348301711814e-09, "loss": 0.5371, "step": 15430 }, { "epoch": 1.97, "grad_norm": 0.7084393556800649, "learning_rate": 7.584444098535092e-09, "loss": 0.4544, "step": 15431 }, { "epoch": 1.97, "grad_norm": 0.7294294329092668, "learning_rate": 7.527752406534805e-09, "loss": 0.4824, "step": 15432 }, { "epoch": 1.97, "grad_norm": 0.7854881969775811, "learning_rate": 7.47127322812513e-09, "loss": 0.5019, "step": 15433 }, { "epoch": 1.97, "grad_norm": 0.786285276568237, "learning_rate": 7.4150065657091485e-09, "loss": 0.531, "step": 15434 }, { "epoch": 1.97, "grad_norm": 0.7501965686418881, "learning_rate": 7.358952421682164e-09, "loss": 0.5002, "step": 15435 }, { "epoch": 1.97, "grad_norm": 0.6250430499672488, "learning_rate": 7.303110798431157e-09, "loss": 0.4905, "step": 15436 }, { "epoch": 1.97, "grad_norm": 0.7922632747127092, "learning_rate": 7.2474816983314485e-09, "loss": 0.4478, "step": 15437 }, { "epoch": 1.97, "grad_norm": 0.7077119399120396, "learning_rate": 7.1920651237522564e-09, "loss": 0.4856, "step": 15438 }, { "epoch": 1.97, "grad_norm": 0.7847199706321449, "learning_rate": 7.136861077052249e-09, "loss": 0.4857, "step": 15439 }, { "epoch": 1.97, "grad_norm": 0.6216998985820167, "learning_rate": 7.081869560581212e-09, "loss": 0.4702, "step": 15440 }, { "epoch": 1.97, "grad_norm": 0.9126848764318422, "learning_rate": 7.027090576679496e-09, "loss": 0.5519, "step": 15441 }, { "epoch": 1.97, "grad_norm": 0.728493213276315, "learning_rate": 6.9725241276796804e-09, "loss": 0.489, "step": 15442 }, { "epoch": 1.97, "grad_norm": 0.6653442627642763, "learning_rate": 6.918170215903796e-09, "loss": 0.5079, "step": 15443 }, { "epoch": 1.97, "grad_norm": 1.14623216763423, "learning_rate": 6.864028843665549e-09, "loss": 0.5282, "step": 15444 }, { "epoch": 1.97, "grad_norm": 0.6633187844859918, "learning_rate": 6.810100013270315e-09, "loss": 0.4988, "step": 15445 }, { "epoch": 1.97, "grad_norm": 0.7771031492389495, "learning_rate": 6.756383727012927e-09, "loss": 0.4739, "step": 15446 }, { "epoch": 1.97, "grad_norm": 0.5635616258467558, "learning_rate": 6.702879987180444e-09, "loss": 0.4638, "step": 15447 }, { "epoch": 1.97, "grad_norm": 0.8067186112629603, "learning_rate": 6.649588796049933e-09, "loss": 0.5442, "step": 15448 }, { "epoch": 1.97, "grad_norm": 0.7654219195221258, "learning_rate": 6.596510155889579e-09, "loss": 0.5057, "step": 15449 }, { "epoch": 1.97, "grad_norm": 0.7229041951918309, "learning_rate": 6.543644068959798e-09, "loss": 0.5214, "step": 15450 }, { "epoch": 1.97, "grad_norm": 0.6668478604716366, "learning_rate": 6.490990537509901e-09, "loss": 0.4324, "step": 15451 }, { "epoch": 1.97, "grad_norm": 0.6078471112802191, "learning_rate": 6.438549563781426e-09, "loss": 0.4712, "step": 15452 }, { "epoch": 1.97, "grad_norm": 0.7366108885320185, "learning_rate": 6.386321150007035e-09, "loss": 0.4691, "step": 15453 }, { "epoch": 1.97, "grad_norm": 0.789460484514598, "learning_rate": 6.334305298409393e-09, "loss": 0.5414, "step": 15454 }, { "epoch": 1.97, "grad_norm": 1.5248325506754683, "learning_rate": 6.282502011203395e-09, "loss": 0.464, "step": 15455 }, { "epoch": 1.97, "grad_norm": 0.6122735820222236, "learning_rate": 6.2309112905933885e-09, "loss": 0.4544, "step": 15456 }, { "epoch": 1.97, "grad_norm": 0.5989020667470607, "learning_rate": 6.1795331387753955e-09, "loss": 0.4271, "step": 15457 }, { "epoch": 1.97, "grad_norm": 0.6346964513683307, "learning_rate": 6.128367557936554e-09, "loss": 0.4255, "step": 15458 }, { "epoch": 1.97, "grad_norm": 0.6479106284181875, "learning_rate": 6.0774145502556784e-09, "loss": 0.4804, "step": 15459 }, { "epoch": 1.97, "grad_norm": 0.7651171269092488, "learning_rate": 6.0266741179004774e-09, "loss": 0.5282, "step": 15460 }, { "epoch": 1.97, "grad_norm": 0.610236844229061, "learning_rate": 5.976146263031446e-09, "loss": 0.4213, "step": 15461 }, { "epoch": 1.97, "grad_norm": 0.6144612677220715, "learning_rate": 5.925830987799641e-09, "loss": 0.5038, "step": 15462 }, { "epoch": 1.97, "grad_norm": 0.6515581655506689, "learning_rate": 5.875728294346128e-09, "loss": 0.4452, "step": 15463 }, { "epoch": 1.97, "grad_norm": 0.5531404841108503, "learning_rate": 5.825838184803645e-09, "loss": 0.4218, "step": 15464 }, { "epoch": 1.97, "grad_norm": 0.6521565471825053, "learning_rate": 5.776160661296604e-09, "loss": 0.477, "step": 15465 }, { "epoch": 1.97, "grad_norm": 0.6533604321682576, "learning_rate": 5.726695725938869e-09, "loss": 0.4763, "step": 15466 }, { "epoch": 1.97, "grad_norm": 0.5963840181705645, "learning_rate": 5.677443380835979e-09, "loss": 0.4293, "step": 15467 }, { "epoch": 1.97, "grad_norm": 0.6531464106229974, "learning_rate": 5.628403628085144e-09, "loss": 0.4748, "step": 15468 }, { "epoch": 1.97, "grad_norm": 0.6454850603546034, "learning_rate": 5.579576469773585e-09, "loss": 0.4157, "step": 15469 }, { "epoch": 1.97, "grad_norm": 0.5725170746777527, "learning_rate": 5.530961907979082e-09, "loss": 0.4058, "step": 15470 }, { "epoch": 1.97, "grad_norm": 0.6521757296415698, "learning_rate": 5.482559944771648e-09, "loss": 0.4636, "step": 15471 }, { "epoch": 1.97, "grad_norm": 1.4702547180046146, "learning_rate": 5.434370582211856e-09, "loss": 0.4438, "step": 15472 }, { "epoch": 1.97, "grad_norm": 0.609859695814063, "learning_rate": 5.386393822350289e-09, "loss": 0.4751, "step": 15473 }, { "epoch": 1.97, "grad_norm": 0.6575833418183686, "learning_rate": 5.3386296672292e-09, "loss": 0.4435, "step": 15474 }, { "epoch": 1.97, "grad_norm": 0.6167605085011579, "learning_rate": 5.29107811888252e-09, "loss": 0.4745, "step": 15475 }, { "epoch": 1.97, "grad_norm": 0.6728242193817675, "learning_rate": 5.243739179333074e-09, "loss": 0.4476, "step": 15476 }, { "epoch": 1.97, "grad_norm": 0.7437500397199031, "learning_rate": 5.196612850597582e-09, "loss": 0.5059, "step": 15477 }, { "epoch": 1.97, "grad_norm": 0.7372479431961402, "learning_rate": 5.149699134680553e-09, "loss": 0.4677, "step": 15478 }, { "epoch": 1.97, "grad_norm": 0.616694721621885, "learning_rate": 5.102998033579831e-09, "loss": 0.4804, "step": 15479 }, { "epoch": 1.97, "grad_norm": 0.7193644565276939, "learning_rate": 5.056509549282718e-09, "loss": 0.5307, "step": 15480 }, { "epoch": 1.97, "grad_norm": 0.7103474738076568, "learning_rate": 5.010233683768739e-09, "loss": 0.5368, "step": 15481 }, { "epoch": 1.97, "grad_norm": 0.7650014277909589, "learning_rate": 4.964170439007432e-09, "loss": 0.4887, "step": 15482 }, { "epoch": 1.97, "grad_norm": 0.591218752928957, "learning_rate": 4.918319816959449e-09, "loss": 0.4407, "step": 15483 }, { "epoch": 1.97, "grad_norm": 0.5700632527235814, "learning_rate": 4.872681819576564e-09, "loss": 0.411, "step": 15484 }, { "epoch": 1.97, "grad_norm": 0.6484060008471071, "learning_rate": 4.827256448801665e-09, "loss": 0.4373, "step": 15485 }, { "epoch": 1.97, "grad_norm": 0.6461203590954838, "learning_rate": 4.782043706568207e-09, "loss": 0.4677, "step": 15486 }, { "epoch": 1.97, "grad_norm": 0.7250940806805084, "learning_rate": 4.737043594800761e-09, "loss": 0.5472, "step": 15487 }, { "epoch": 1.97, "grad_norm": 0.6982208626015444, "learning_rate": 4.6922561154144616e-09, "loss": 0.4816, "step": 15488 }, { "epoch": 1.97, "grad_norm": 0.6293054080717639, "learning_rate": 4.647681270316118e-09, "loss": 0.4616, "step": 15489 }, { "epoch": 1.97, "grad_norm": 0.626189923830193, "learning_rate": 4.6033190614031e-09, "loss": 0.4411, "step": 15490 }, { "epoch": 1.97, "grad_norm": 0.6566370181024924, "learning_rate": 4.559169490563897e-09, "loss": 0.4844, "step": 15491 }, { "epoch": 1.97, "grad_norm": 0.8120906741192021, "learning_rate": 4.515232559678117e-09, "loss": 0.4659, "step": 15492 }, { "epoch": 1.97, "grad_norm": 0.6604216676203187, "learning_rate": 4.471508270614821e-09, "loss": 0.498, "step": 15493 }, { "epoch": 1.97, "grad_norm": 0.6587592626195641, "learning_rate": 4.427996625236408e-09, "loss": 0.4454, "step": 15494 }, { "epoch": 1.97, "grad_norm": 0.5734305973101728, "learning_rate": 4.384697625394729e-09, "loss": 0.4094, "step": 15495 }, { "epoch": 1.97, "grad_norm": 0.627064411989165, "learning_rate": 4.3416112729327556e-09, "loss": 0.4113, "step": 15496 }, { "epoch": 1.97, "grad_norm": 0.6374177168690024, "learning_rate": 4.298737569684574e-09, "loss": 0.4244, "step": 15497 }, { "epoch": 1.97, "grad_norm": 0.6092671685716567, "learning_rate": 4.256076517474839e-09, "loss": 0.514, "step": 15498 }, { "epoch": 1.97, "grad_norm": 0.9168220244348965, "learning_rate": 4.213628118119872e-09, "loss": 0.5047, "step": 15499 }, { "epoch": 1.97, "grad_norm": 0.619726952734395, "learning_rate": 4.171392373426564e-09, "loss": 0.4223, "step": 15500 }, { "epoch": 1.97, "grad_norm": 0.8028483995662326, "learning_rate": 4.129369285192919e-09, "loss": 0.4838, "step": 15501 }, { "epoch": 1.97, "grad_norm": 0.9574708704598934, "learning_rate": 4.087558855206952e-09, "loss": 0.5642, "step": 15502 }, { "epoch": 1.98, "grad_norm": 0.7188255109238451, "learning_rate": 4.045961085249462e-09, "loss": 0.5096, "step": 15503 }, { "epoch": 1.98, "grad_norm": 0.7114869383750543, "learning_rate": 4.004575977090697e-09, "loss": 0.4919, "step": 15504 }, { "epoch": 1.98, "grad_norm": 0.7510786383712943, "learning_rate": 3.9634035324920276e-09, "loss": 0.5687, "step": 15505 }, { "epoch": 1.98, "grad_norm": 0.7351002246321302, "learning_rate": 3.92244375320594e-09, "loss": 0.5602, "step": 15506 }, { "epoch": 1.98, "grad_norm": 0.6817229237547137, "learning_rate": 3.881696640976596e-09, "loss": 0.4747, "step": 15507 }, { "epoch": 1.98, "grad_norm": 0.6450276951496237, "learning_rate": 3.8411621975381615e-09, "loss": 0.4392, "step": 15508 }, { "epoch": 1.98, "grad_norm": 0.6197451145295488, "learning_rate": 3.8008404246159256e-09, "loss": 0.4469, "step": 15509 }, { "epoch": 1.98, "grad_norm": 0.6747691924674576, "learning_rate": 3.760731323926292e-09, "loss": 0.4064, "step": 15510 }, { "epoch": 1.98, "grad_norm": 0.7635359896547521, "learning_rate": 3.720834897176784e-09, "loss": 0.4268, "step": 15511 }, { "epoch": 1.98, "grad_norm": 0.7417785772447753, "learning_rate": 3.681151146064932e-09, "loss": 0.504, "step": 15512 }, { "epoch": 1.98, "grad_norm": 0.7718289036278418, "learning_rate": 3.641680072281051e-09, "loss": 0.506, "step": 15513 }, { "epoch": 1.98, "grad_norm": 0.8244534208624928, "learning_rate": 3.6024216775049082e-09, "loss": 0.5494, "step": 15514 }, { "epoch": 1.98, "grad_norm": 0.7756824588190223, "learning_rate": 3.5633759634073893e-09, "loss": 0.5071, "step": 15515 }, { "epoch": 1.98, "grad_norm": 0.8119800066916428, "learning_rate": 3.5245429316499436e-09, "loss": 0.4525, "step": 15516 }, { "epoch": 1.98, "grad_norm": 0.8269203800279461, "learning_rate": 3.485922583887358e-09, "loss": 0.512, "step": 15517 }, { "epoch": 1.98, "grad_norm": 0.7579298322107699, "learning_rate": 3.447514921761652e-09, "loss": 0.4949, "step": 15518 }, { "epoch": 1.98, "grad_norm": 0.624727004219979, "learning_rate": 3.40931994690874e-09, "loss": 0.4593, "step": 15519 }, { "epoch": 1.98, "grad_norm": 0.7675656149061428, "learning_rate": 3.3713376609545432e-09, "loss": 0.5584, "step": 15520 }, { "epoch": 1.98, "grad_norm": 0.784676331627015, "learning_rate": 3.3335680655149915e-09, "loss": 0.5721, "step": 15521 }, { "epoch": 1.98, "grad_norm": 0.6867066590892169, "learning_rate": 3.2960111621987977e-09, "loss": 0.5114, "step": 15522 }, { "epoch": 1.98, "grad_norm": 0.6218848722337349, "learning_rate": 3.258666952604128e-09, "loss": 0.4234, "step": 15523 }, { "epoch": 1.98, "grad_norm": 0.680099674920185, "learning_rate": 3.221535438320822e-09, "loss": 0.4655, "step": 15524 }, { "epoch": 1.98, "grad_norm": 0.6674053334620355, "learning_rate": 3.1846166209292815e-09, "loss": 0.4901, "step": 15525 }, { "epoch": 1.98, "grad_norm": 0.8626624101411562, "learning_rate": 3.147910502001028e-09, "loss": 0.4761, "step": 15526 }, { "epoch": 1.98, "grad_norm": 0.6453976315874081, "learning_rate": 3.1114170830987e-09, "loss": 0.505, "step": 15527 }, { "epoch": 1.98, "grad_norm": 0.7407821371206041, "learning_rate": 3.0751363657754996e-09, "loss": 0.5514, "step": 15528 }, { "epoch": 1.98, "grad_norm": 0.8010733675220704, "learning_rate": 3.039068351575747e-09, "loss": 0.5007, "step": 15529 }, { "epoch": 1.98, "grad_norm": 0.7172985774998764, "learning_rate": 3.0032130420348803e-09, "loss": 0.5248, "step": 15530 }, { "epoch": 1.98, "grad_norm": 0.7400050326815967, "learning_rate": 2.967570438678902e-09, "loss": 0.4518, "step": 15531 }, { "epoch": 1.98, "grad_norm": 0.6920182111570137, "learning_rate": 2.9321405430254856e-09, "loss": 0.5064, "step": 15532 }, { "epoch": 1.98, "grad_norm": 0.6409755166804765, "learning_rate": 2.896923356582315e-09, "loss": 0.4223, "step": 15533 }, { "epoch": 1.98, "grad_norm": 0.7238471222944921, "learning_rate": 2.8619188808493014e-09, "loss": 0.4535, "step": 15534 }, { "epoch": 1.98, "grad_norm": 0.6842925873671077, "learning_rate": 2.8271271173152537e-09, "loss": 0.4671, "step": 15535 }, { "epoch": 1.98, "grad_norm": 0.5985659643508123, "learning_rate": 2.7925480674617646e-09, "loss": 0.4463, "step": 15536 }, { "epoch": 1.98, "grad_norm": 0.7653797333437653, "learning_rate": 2.758181732760434e-09, "loss": 0.4582, "step": 15537 }, { "epoch": 1.98, "grad_norm": 0.6644284473684977, "learning_rate": 2.7240281146750923e-09, "loss": 0.4524, "step": 15538 }, { "epoch": 1.98, "grad_norm": 0.6234093781186587, "learning_rate": 2.6900872146584657e-09, "loss": 0.4642, "step": 15539 }, { "epoch": 1.98, "grad_norm": 0.6808224688763457, "learning_rate": 2.656359034155509e-09, "loss": 0.4819, "step": 15540 }, { "epoch": 1.98, "grad_norm": 0.7635437567252165, "learning_rate": 2.622843574602296e-09, "loss": 0.5346, "step": 15541 }, { "epoch": 1.98, "grad_norm": 0.7038417378835212, "learning_rate": 2.5895408374254638e-09, "loss": 0.5097, "step": 15542 }, { "epoch": 1.98, "grad_norm": 0.7132805924101568, "learning_rate": 2.5564508240427664e-09, "loss": 0.5323, "step": 15543 }, { "epoch": 1.98, "grad_norm": 0.781155002200328, "learning_rate": 2.5235735358619672e-09, "loss": 0.5497, "step": 15544 }, { "epoch": 1.98, "grad_norm": 1.039862032666666, "learning_rate": 2.490908974283057e-09, "loss": 0.5079, "step": 15545 }, { "epoch": 1.98, "grad_norm": 0.6843445964327699, "learning_rate": 2.4584571406960356e-09, "loss": 0.5289, "step": 15546 }, { "epoch": 1.98, "grad_norm": 0.6870579917581314, "learning_rate": 2.42621803648313e-09, "loss": 0.5121, "step": 15547 }, { "epoch": 1.98, "grad_norm": 0.7113288450731012, "learning_rate": 2.3941916630160212e-09, "loss": 0.5085, "step": 15548 }, { "epoch": 1.98, "grad_norm": 0.8994582734472438, "learning_rate": 2.3623780216580626e-09, "loss": 0.5621, "step": 15549 }, { "epoch": 1.98, "grad_norm": 0.8016171974988877, "learning_rate": 2.330777113763727e-09, "loss": 0.5717, "step": 15550 }, { "epoch": 1.98, "grad_norm": 0.9579471029032215, "learning_rate": 2.2993889406774937e-09, "loss": 0.5647, "step": 15551 }, { "epoch": 1.98, "grad_norm": 0.9244671583178714, "learning_rate": 2.268213503736072e-09, "loss": 0.5066, "step": 15552 }, { "epoch": 1.98, "grad_norm": 0.816512944942824, "learning_rate": 2.237250804266733e-09, "loss": 0.5488, "step": 15553 }, { "epoch": 1.98, "grad_norm": 0.6806019496297216, "learning_rate": 2.2065008435867563e-09, "loss": 0.4994, "step": 15554 }, { "epoch": 1.98, "grad_norm": 0.7292192801250715, "learning_rate": 2.175963623005095e-09, "loss": 0.4785, "step": 15555 }, { "epoch": 1.98, "grad_norm": 0.5602877364713836, "learning_rate": 2.1456391438223755e-09, "loss": 0.4347, "step": 15556 }, { "epoch": 1.98, "grad_norm": 0.7027876099508841, "learning_rate": 2.1155274073286767e-09, "loss": 0.446, "step": 15557 }, { "epoch": 1.98, "grad_norm": 0.6663318630659315, "learning_rate": 2.0856284148057516e-09, "loss": 0.5192, "step": 15558 }, { "epoch": 1.98, "grad_norm": 0.703341408895381, "learning_rate": 2.055942167527025e-09, "loss": 0.4686, "step": 15559 }, { "epoch": 1.98, "grad_norm": 0.7170155103906695, "learning_rate": 2.026468666755932e-09, "loss": 0.516, "step": 15560 }, { "epoch": 1.98, "grad_norm": 0.5684541164977284, "learning_rate": 1.9972079137464685e-09, "loss": 0.4334, "step": 15561 }, { "epoch": 1.98, "grad_norm": 0.7012560560684962, "learning_rate": 1.968159909744305e-09, "loss": 0.4531, "step": 15562 }, { "epoch": 1.98, "grad_norm": 0.6087526871132278, "learning_rate": 1.93932465598623e-09, "loss": 0.4444, "step": 15563 }, { "epoch": 1.98, "grad_norm": 0.6095452782759248, "learning_rate": 1.9107021537001504e-09, "loss": 0.4469, "step": 15564 }, { "epoch": 1.98, "grad_norm": 0.6307801675872631, "learning_rate": 1.882292404103425e-09, "loss": 0.5105, "step": 15565 }, { "epoch": 1.98, "grad_norm": 0.9502077209431817, "learning_rate": 1.854095408405643e-09, "loss": 0.5165, "step": 15566 }, { "epoch": 1.98, "grad_norm": 0.674841858004928, "learning_rate": 1.8261111678075095e-09, "loss": 0.4981, "step": 15567 }, { "epoch": 1.98, "grad_norm": 0.5827438854547227, "learning_rate": 1.7983396834997392e-09, "loss": 0.4384, "step": 15568 }, { "epoch": 1.98, "grad_norm": 0.8940266539403025, "learning_rate": 1.7707809566647194e-09, "loss": 0.6048, "step": 15569 }, { "epoch": 1.98, "grad_norm": 0.9220590119912564, "learning_rate": 1.743434988475956e-09, "loss": 0.6107, "step": 15570 }, { "epoch": 1.98, "grad_norm": 0.8101701704697742, "learning_rate": 1.7163017800964077e-09, "loss": 0.4814, "step": 15571 }, { "epoch": 1.98, "grad_norm": 0.65679392938921, "learning_rate": 1.6893813326818165e-09, "loss": 0.4615, "step": 15572 }, { "epoch": 1.98, "grad_norm": 0.6995338248536687, "learning_rate": 1.6626736473784878e-09, "loss": 0.4467, "step": 15573 }, { "epoch": 1.98, "grad_norm": 0.6118462294765793, "learning_rate": 1.6361787253221794e-09, "loss": 0.4439, "step": 15574 }, { "epoch": 1.98, "grad_norm": 0.640929302536141, "learning_rate": 1.6098965676414335e-09, "loss": 0.4378, "step": 15575 }, { "epoch": 1.98, "grad_norm": 0.5945204147700056, "learning_rate": 1.5838271754547996e-09, "loss": 0.4415, "step": 15576 }, { "epoch": 1.98, "grad_norm": 0.8421400585443582, "learning_rate": 1.5579705498719455e-09, "loss": 0.505, "step": 15577 }, { "epoch": 1.98, "grad_norm": 0.5845640358877839, "learning_rate": 1.5323266919931024e-09, "loss": 0.4361, "step": 15578 }, { "epoch": 1.98, "grad_norm": 0.6264724336315702, "learning_rate": 1.5068956029107295e-09, "loss": 0.481, "step": 15579 }, { "epoch": 1.98, "grad_norm": 0.5992543238921089, "learning_rate": 1.4816772837072945e-09, "loss": 0.4509, "step": 15580 }, { "epoch": 1.98, "grad_norm": 0.6214176294401098, "learning_rate": 1.4566717354552728e-09, "loss": 0.4794, "step": 15581 }, { "epoch": 1.99, "grad_norm": 0.7512609505498142, "learning_rate": 1.4318789592199233e-09, "loss": 0.4772, "step": 15582 }, { "epoch": 1.99, "grad_norm": 0.6372094831895284, "learning_rate": 1.4072989560565132e-09, "loss": 0.4626, "step": 15583 }, { "epoch": 1.99, "grad_norm": 0.6232192120920588, "learning_rate": 1.3829317270108722e-09, "loss": 0.4627, "step": 15584 }, { "epoch": 1.99, "grad_norm": 0.5443182656853527, "learning_rate": 1.358777273120504e-09, "loss": 0.4155, "step": 15585 }, { "epoch": 1.99, "grad_norm": 0.6184316920999691, "learning_rate": 1.3348355954140302e-09, "loss": 0.46, "step": 15586 }, { "epoch": 1.99, "grad_norm": 0.8551400260235651, "learning_rate": 1.3111066949100804e-09, "loss": 0.4633, "step": 15587 }, { "epoch": 1.99, "grad_norm": 0.5968350443131225, "learning_rate": 1.2875905726184023e-09, "loss": 0.4027, "step": 15588 }, { "epoch": 1.99, "grad_norm": 0.6626666558913741, "learning_rate": 1.2642872295404175e-09, "loss": 0.4738, "step": 15589 }, { "epoch": 1.99, "grad_norm": 0.760782007786369, "learning_rate": 1.2411966666686648e-09, "loss": 0.4834, "step": 15590 }, { "epoch": 1.99, "grad_norm": 0.7298150472548615, "learning_rate": 1.2183188849845816e-09, "loss": 0.454, "step": 15591 }, { "epoch": 1.99, "grad_norm": 0.5985839780519197, "learning_rate": 1.1956538854634992e-09, "loss": 0.4719, "step": 15592 }, { "epoch": 1.99, "grad_norm": 0.9701910904263297, "learning_rate": 1.1732016690690906e-09, "loss": 0.4876, "step": 15593 }, { "epoch": 1.99, "grad_norm": 0.6099589761121897, "learning_rate": 1.1509622367578132e-09, "loss": 0.4491, "step": 15594 }, { "epoch": 1.99, "grad_norm": 0.7835962278499683, "learning_rate": 1.1289355894761323e-09, "loss": 0.4863, "step": 15595 }, { "epoch": 1.99, "grad_norm": 0.6880095308945523, "learning_rate": 1.1071217281610758e-09, "loss": 0.4772, "step": 15596 }, { "epoch": 1.99, "grad_norm": 0.6361460173723696, "learning_rate": 1.0855206537419006e-09, "loss": 0.4896, "step": 15597 }, { "epoch": 1.99, "grad_norm": 0.6317580868881565, "learning_rate": 1.0641323671384263e-09, "loss": 0.4758, "step": 15598 }, { "epoch": 1.99, "grad_norm": 0.7359939715409596, "learning_rate": 1.0429568692599256e-09, "loss": 0.5505, "step": 15599 }, { "epoch": 1.99, "grad_norm": 0.7809403785693758, "learning_rate": 1.021994161008455e-09, "loss": 0.5093, "step": 15600 }, { "epoch": 1.99, "grad_norm": 0.6749154371535068, "learning_rate": 1.0012442432760782e-09, "loss": 0.4455, "step": 15601 }, { "epoch": 1.99, "grad_norm": 0.7580322578901564, "learning_rate": 9.807071169465333e-10, "loss": 0.5055, "step": 15602 }, { "epoch": 1.99, "grad_norm": 0.9036521290599435, "learning_rate": 9.603827828935652e-10, "loss": 0.5127, "step": 15603 }, { "epoch": 1.99, "grad_norm": 0.7010103610989801, "learning_rate": 9.402712419825932e-10, "loss": 0.4913, "step": 15604 }, { "epoch": 1.99, "grad_norm": 0.6259332114238544, "learning_rate": 9.20372495069044e-10, "loss": 0.4516, "step": 15605 }, { "epoch": 1.99, "grad_norm": 0.6156222505783443, "learning_rate": 9.00686543001128e-10, "loss": 0.4336, "step": 15606 }, { "epoch": 1.99, "grad_norm": 0.6952670234283954, "learning_rate": 8.812133866159534e-10, "loss": 0.4603, "step": 15607 }, { "epoch": 1.99, "grad_norm": 0.7718877398460587, "learning_rate": 8.619530267423015e-10, "loss": 0.4993, "step": 15608 }, { "epoch": 1.99, "grad_norm": 0.6792640903403478, "learning_rate": 8.429054642006273e-10, "loss": 0.4726, "step": 15609 }, { "epoch": 1.99, "grad_norm": 0.7531646785923019, "learning_rate": 8.240706998013937e-10, "loss": 0.5563, "step": 15610 }, { "epoch": 1.99, "grad_norm": 0.5526047271174456, "learning_rate": 8.054487343467366e-10, "loss": 0.4497, "step": 15611 }, { "epoch": 1.99, "grad_norm": 0.6317830063090688, "learning_rate": 7.870395686282451e-10, "loss": 0.5172, "step": 15612 }, { "epoch": 1.99, "grad_norm": 0.6817907366604177, "learning_rate": 7.688432034308469e-10, "loss": 0.5319, "step": 15613 }, { "epoch": 1.99, "grad_norm": 0.7379297428466698, "learning_rate": 7.508596395283673e-10, "loss": 0.4912, "step": 15614 }, { "epoch": 1.99, "grad_norm": 0.6032868755493527, "learning_rate": 7.330888776868605e-10, "loss": 0.4583, "step": 15615 }, { "epoch": 1.99, "grad_norm": 0.6444696828922737, "learning_rate": 7.155309186618331e-10, "loss": 0.4162, "step": 15616 }, { "epoch": 1.99, "grad_norm": 0.5544914690410722, "learning_rate": 6.981857632015753e-10, "loss": 0.4067, "step": 15617 }, { "epoch": 1.99, "grad_norm": 0.6554873134957288, "learning_rate": 6.810534120438306e-10, "loss": 0.4639, "step": 15618 }, { "epoch": 1.99, "grad_norm": 0.59805051168263, "learning_rate": 6.641338659180152e-10, "loss": 0.4233, "step": 15619 }, { "epoch": 1.99, "grad_norm": 0.6750675839454923, "learning_rate": 6.47427125544664e-10, "loss": 0.4814, "step": 15620 }, { "epoch": 1.99, "grad_norm": 0.5748993317132618, "learning_rate": 6.309331916348749e-10, "loss": 0.4423, "step": 15621 }, { "epoch": 1.99, "grad_norm": 0.7006065791599998, "learning_rate": 6.146520648903088e-10, "loss": 0.539, "step": 15622 }, { "epoch": 1.99, "grad_norm": 0.5944883544115594, "learning_rate": 5.985837460042998e-10, "loss": 0.5076, "step": 15623 }, { "epoch": 1.99, "grad_norm": 0.8079258449731201, "learning_rate": 5.827282356607456e-10, "loss": 0.548, "step": 15624 }, { "epoch": 1.99, "grad_norm": 0.8368729758880784, "learning_rate": 5.670855345346615e-10, "loss": 0.5211, "step": 15625 }, { "epoch": 1.99, "grad_norm": 0.6262202366938825, "learning_rate": 5.516556432916264e-10, "loss": 0.4558, "step": 15626 }, { "epoch": 1.99, "grad_norm": 0.6830778612531389, "learning_rate": 5.364385625888924e-10, "loss": 0.3986, "step": 15627 }, { "epoch": 1.99, "grad_norm": 0.7235664683566081, "learning_rate": 5.214342930737193e-10, "loss": 0.4831, "step": 15628 }, { "epoch": 1.99, "grad_norm": 0.7513574796405303, "learning_rate": 5.066428353850405e-10, "loss": 0.5252, "step": 15629 }, { "epoch": 1.99, "grad_norm": 0.7668818318639198, "learning_rate": 4.920641901529078e-10, "loss": 0.5239, "step": 15630 }, { "epoch": 1.99, "grad_norm": 0.6181001351554519, "learning_rate": 4.776983579968252e-10, "loss": 0.4558, "step": 15631 }, { "epoch": 1.99, "grad_norm": 0.7127637072296665, "learning_rate": 4.635453395296363e-10, "loss": 0.5091, "step": 15632 }, { "epoch": 1.99, "grad_norm": 0.7207195808610322, "learning_rate": 4.496051353525266e-10, "loss": 0.551, "step": 15633 }, { "epoch": 1.99, "grad_norm": 0.7933355248494677, "learning_rate": 4.358777460600205e-10, "loss": 0.56, "step": 15634 }, { "epoch": 1.99, "grad_norm": 0.6327614470687745, "learning_rate": 4.223631722355404e-10, "loss": 0.4646, "step": 15635 }, { "epoch": 1.99, "grad_norm": 0.6644963822691434, "learning_rate": 4.090614144547367e-10, "loss": 0.4768, "step": 15636 }, { "epoch": 1.99, "grad_norm": 0.6607952861622112, "learning_rate": 3.9597247328382325e-10, "loss": 0.4655, "step": 15637 }, { "epoch": 1.99, "grad_norm": 0.569126194134559, "learning_rate": 3.8309634927957695e-10, "loss": 0.3714, "step": 15638 }, { "epoch": 1.99, "grad_norm": 0.6085774909984473, "learning_rate": 3.7043304299100303e-10, "loss": 0.4655, "step": 15639 }, { "epoch": 1.99, "grad_norm": 0.5612881209898736, "learning_rate": 3.5798255495600453e-10, "loss": 0.4793, "step": 15640 }, { "epoch": 1.99, "grad_norm": 0.5508503451014122, "learning_rate": 3.457448857052681e-10, "loss": 0.4006, "step": 15641 }, { "epoch": 1.99, "grad_norm": 0.6911027376461412, "learning_rate": 3.337200357594883e-10, "loss": 0.4649, "step": 15642 }, { "epoch": 1.99, "grad_norm": 0.7155837762410707, "learning_rate": 3.2190800563103307e-10, "loss": 0.4473, "step": 15643 }, { "epoch": 1.99, "grad_norm": 0.6535262804775495, "learning_rate": 3.1030879582172323e-10, "loss": 0.4738, "step": 15644 }, { "epoch": 1.99, "grad_norm": 0.782179796350839, "learning_rate": 2.989224068261631e-10, "loss": 0.5426, "step": 15645 }, { "epoch": 1.99, "grad_norm": 0.6369420137072925, "learning_rate": 2.8774883912896513e-10, "loss": 0.4796, "step": 15646 }, { "epoch": 1.99, "grad_norm": 0.8402243182155162, "learning_rate": 2.7678809320530466e-10, "loss": 0.5536, "step": 15647 }, { "epoch": 1.99, "grad_norm": 0.8476203388697416, "learning_rate": 2.6604016952147537e-10, "loss": 0.5394, "step": 15648 }, { "epoch": 1.99, "grad_norm": 0.6254833898307344, "learning_rate": 2.5550506853599944e-10, "loss": 0.4753, "step": 15649 }, { "epoch": 1.99, "grad_norm": 0.7505157641073964, "learning_rate": 2.451827906968518e-10, "loss": 0.5482, "step": 15650 }, { "epoch": 1.99, "grad_norm": 0.7477072054093411, "learning_rate": 2.350733364431257e-10, "loss": 0.4546, "step": 15651 }, { "epoch": 1.99, "grad_norm": 0.7907027794498036, "learning_rate": 2.2517670620503252e-10, "loss": 0.4569, "step": 15652 }, { "epoch": 1.99, "grad_norm": 1.3685223452604849, "learning_rate": 2.1549290040445703e-10, "loss": 0.5065, "step": 15653 }, { "epoch": 1.99, "grad_norm": 1.1004366142535096, "learning_rate": 2.0602191945329198e-10, "loss": 0.4938, "step": 15654 }, { "epoch": 1.99, "grad_norm": 0.581383065111869, "learning_rate": 1.967637637551034e-10, "loss": 0.4559, "step": 15655 }, { "epoch": 1.99, "grad_norm": 0.5650531967408814, "learning_rate": 1.8771843370291032e-10, "loss": 0.4001, "step": 15656 }, { "epoch": 1.99, "grad_norm": 0.7047479681773297, "learning_rate": 1.7888592968307028e-10, "loss": 0.4669, "step": 15657 }, { "epoch": 1.99, "grad_norm": 0.7971732853105995, "learning_rate": 1.702662520708387e-10, "loss": 0.5183, "step": 15658 }, { "epoch": 1.99, "grad_norm": 0.851521966625726, "learning_rate": 1.6185940123258913e-10, "loss": 0.5621, "step": 15659 }, { "epoch": 2.0, "grad_norm": 0.7279155691046868, "learning_rate": 1.536653775274788e-10, "loss": 0.5276, "step": 15660 }, { "epoch": 2.0, "grad_norm": 0.8452519140295277, "learning_rate": 1.4568418130300744e-10, "loss": 0.5562, "step": 15661 }, { "epoch": 2.0, "grad_norm": 0.6247525741162779, "learning_rate": 1.3791581290001354e-10, "loss": 0.5108, "step": 15662 }, { "epoch": 2.0, "grad_norm": 0.770994682301271, "learning_rate": 1.3036027264823338e-10, "loss": 0.5534, "step": 15663 }, { "epoch": 2.0, "grad_norm": 0.8120223885865211, "learning_rate": 1.2301756087018668e-10, "loss": 0.5455, "step": 15664 }, { "epoch": 2.0, "grad_norm": 0.7149145690182054, "learning_rate": 1.1588767787784616e-10, "loss": 0.5345, "step": 15665 }, { "epoch": 2.0, "grad_norm": 0.778193833156261, "learning_rate": 1.089706239748578e-10, "loss": 0.4349, "step": 15666 }, { "epoch": 2.0, "grad_norm": 0.6680792262667913, "learning_rate": 1.0226639945543071e-10, "loss": 0.5106, "step": 15667 }, { "epoch": 2.0, "grad_norm": 1.039636831320422, "learning_rate": 9.57750046048922e-11, "loss": 0.4807, "step": 15668 }, { "epoch": 2.0, "grad_norm": 0.6073776570154549, "learning_rate": 8.94964397002429e-11, "loss": 0.4618, "step": 15669 }, { "epoch": 2.0, "grad_norm": 0.8771337979891524, "learning_rate": 8.343070500793637e-11, "loss": 0.5549, "step": 15670 }, { "epoch": 2.0, "grad_norm": 0.6214516749000534, "learning_rate": 7.757780078720967e-11, "loss": 0.5326, "step": 15671 }, { "epoch": 2.0, "grad_norm": 0.7769165634629933, "learning_rate": 7.193772728564253e-11, "loss": 0.4762, "step": 15672 }, { "epoch": 2.0, "grad_norm": 0.5590219651802202, "learning_rate": 6.651048474470845e-11, "loss": 0.4173, "step": 15673 }, { "epoch": 2.0, "grad_norm": 0.672680073273175, "learning_rate": 6.129607339477872e-11, "loss": 0.4732, "step": 15674 }, { "epoch": 2.0, "grad_norm": 0.6593641233545994, "learning_rate": 5.629449345845306e-11, "loss": 0.4717, "step": 15675 }, { "epoch": 2.0, "grad_norm": 0.6662520570294613, "learning_rate": 5.150574514778406e-11, "loss": 0.4465, "step": 15676 }, { "epoch": 2.0, "grad_norm": 0.5777940356680464, "learning_rate": 4.692982866649765e-11, "loss": 0.4853, "step": 15677 }, { "epoch": 2.0, "grad_norm": 0.6671590512777095, "learning_rate": 4.2566744210548184e-11, "loss": 0.4624, "step": 15678 }, { "epoch": 2.0, "grad_norm": 0.631210035250592, "learning_rate": 3.8416491964787806e-11, "loss": 0.458, "step": 15679 }, { "epoch": 2.0, "grad_norm": 0.627119627581415, "learning_rate": 3.4479072105741975e-11, "loss": 0.4722, "step": 15680 }, { "epoch": 2.0, "grad_norm": 0.7308190934689508, "learning_rate": 3.0754484801609475e-11, "loss": 0.5328, "step": 15681 }, { "epoch": 2.0, "grad_norm": 0.7817091133716091, "learning_rate": 2.724273021004198e-11, "loss": 0.5342, "step": 15682 }, { "epoch": 2.0, "grad_norm": 0.768948629788117, "learning_rate": 2.394380848202982e-11, "loss": 0.4726, "step": 15683 }, { "epoch": 2.0, "grad_norm": 0.6194531904625963, "learning_rate": 2.085771975635087e-11, "loss": 0.4739, "step": 15684 }, { "epoch": 2.0, "grad_norm": 0.8119597641055003, "learning_rate": 1.7984464165676784e-11, "loss": 0.5263, "step": 15685 }, { "epoch": 2.0, "grad_norm": 0.7455817504882538, "learning_rate": 1.5324041831576985e-11, "loss": 0.5486, "step": 15686 }, { "epoch": 2.0, "grad_norm": 1.0379938466352296, "learning_rate": 1.287645286729422e-11, "loss": 0.5169, "step": 15687 }, { "epoch": 2.0, "grad_norm": 0.7557173313525261, "learning_rate": 1.0641697377189453e-11, "loss": 0.486, "step": 15688 }, { "epoch": 2.0, "grad_norm": 0.665462978298376, "learning_rate": 8.619775456741864e-12, "loss": 0.4819, "step": 15689 }, { "epoch": 2.0, "grad_norm": 0.6077146291192674, "learning_rate": 6.8106871908835136e-12, "loss": 0.4285, "step": 15690 }, { "epoch": 2.0, "grad_norm": 0.5971349282284016, "learning_rate": 5.214432657885127e-12, "loss": 0.4781, "step": 15691 }, { "epoch": 2.0, "grad_norm": 0.573991926935016, "learning_rate": 3.831011925470307e-12, "loss": 0.4487, "step": 15692 }, { "epoch": 2.0, "grad_norm": 0.6167799000716518, "learning_rate": 2.6604250519257635e-12, "loss": 0.5153, "step": 15693 }, { "epoch": 2.0, "grad_norm": 0.6254240545996209, "learning_rate": 1.7026720877666436e-12, "loss": 0.4456, "step": 15694 }, { "epoch": 2.0, "grad_norm": 0.7405725933388564, "learning_rate": 9.577530729609764e-13, "loss": 0.4766, "step": 15695 }, { "epoch": 2.0, "grad_norm": 0.6853175883616636, "learning_rate": 4.256680402603408e-13, "loss": 0.5059, "step": 15696 }, { "epoch": 2.0, "grad_norm": 0.6915451905342359, "learning_rate": 1.0641701131408611e-13, "loss": 0.4825, "step": 15697 }, { "epoch": 2.0, "grad_norm": 0.7079468415225337, "learning_rate": 0.0, "loss": 0.4065, "step": 15698 }, { "epoch": 2.0, "step": 15698, "total_flos": 7380512543408128.0, "train_loss": 0.5341869555171667, "train_runtime": 85868.5202, "train_samples_per_second": 23.402, "train_steps_per_second": 0.183 } ], "logging_steps": 1.0, "max_steps": 15698, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 1000, "total_flos": 7380512543408128.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }